Skip to content

Commit cf56400

Browse files
Merge branch 'feature_tpc_improvement2' of https://github.com/KazunoriSumiya/mct-model-optimization into feature_tpc_improvement2
2 parents ab5d03b + 9b59a13 commit cf56400

File tree

2 files changed

+115
-67
lines changed

2 files changed

+115
-67
lines changed

docs/api/api_docs/classes/Wrapper.html

Lines changed: 114 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -68,35 +68,31 @@ <h3>Navigation</h3>
6868
</tr>
6969
</thead>
7070
<tbody>
71-
<tr class="row-even"><td><p>target_platform_version</p></td>
72-
<td><p>v1</p></td>
73-
<td><p>Target platform version (use_internal_tpc=True)</p></td>
71+
<tr class="row-even"><td><p>sdsp_version</p></td>
72+
<td><p>3.14</p></td>
73+
<td><p>SDSP version for TPC</p></td>
7474
</tr>
75-
<tr class="row-odd"><td><p>tpc_version</p></td>
76-
<td><p>‘5.0’</p></td>
77-
<td><p>TPC version (use_internal_tpc=False)</p></td>
78-
</tr>
79-
<tr class="row-even"><td><p>activation_error_method</p></td>
75+
<tr class="row-odd"><td><p>activation_error_method</p></td>
8076
<td><p>mct.core.QuantizationErrorMethod.MSE</p></td>
81-
<td><p>Activation quantization error method</p></td>
77+
<td><p>Activation quantization error method (low priority)</p></td>
8278
</tr>
83-
<tr class="row-odd"><td><p>weights_bias_correction</p></td>
79+
<tr class="row-even"><td><p>weights_bias_correction</p></td>
8480
<td><p>True</p></td>
85-
<td><p>Enable weights bias correction</p></td>
81+
<td><p>Enable weights bias correction (low priority)</p></td>
8682
</tr>
87-
<tr class="row-even"><td><p>z_threshold</p></td>
83+
<tr class="row-odd"><td><p>z_threshold</p></td>
8884
<td><p>float(‘inf’)</p></td>
89-
<td><p>Z-threshold for quantization</p></td>
85+
<td><p>Z-threshold for quantization (low priority)</p></td>
9086
</tr>
91-
<tr class="row-odd"><td><p>linear_collapsing</p></td>
87+
<tr class="row-even"><td><p>linear_collapsing</p></td>
9288
<td><p>True</p></td>
93-
<td><p>Enable linear layer collapsing</p></td>
89+
<td><p>Enable linear layer collapsing (low priority)</p></td>
9490
</tr>
95-
<tr class="row-even"><td><p>residual_collapsing</p></td>
91+
<tr class="row-odd"><td><p>residual_collapsing</p></td>
9692
<td><p>True</p></td>
97-
<td><p>Enable residual connection collapsing</p></td>
93+
<td><p>Enable residual connection collapsing (low priority)</p></td>
9894
</tr>
99-
<tr class="row-odd"><td><p>save_model_path</p></td>
95+
<tr class="row-even"><td><p>save_model_path</p></td>
10096
<td><p>‘./qmodel.keras’ / ‘./qmodel.onnx’</p></td>
10197
<td><p>Path to save quantized model (Keras/Pytorch)</p></td>
10298
</tr>
@@ -116,27 +112,47 @@ <h3>Navigation</h3>
116112
</tr>
117113
</thead>
118114
<tbody>
119-
<tr class="row-even"><td><p>target_platform_version</p></td>
120-
<td><p>‘v1’</p></td>
121-
<td><p>Target platform version (use_internal_tpc=True)</p></td>
115+
<tr class="row-even"><td><p>sdsp_version</p></td>
116+
<td><p>‘3.14’</p></td>
117+
<td><p>SDSP version for TPC</p></td>
118+
</tr>
119+
<tr class="row-odd"><td><p>activation_error_method</p></td>
120+
<td><p>mct.core.QuantizationErrorMethod.MSE</p></td>
121+
<td><p>Activation quantization error method (low priority)</p></td>
122122
</tr>
123-
<tr class="row-odd"><td><p>tpc_version</p></td>
124-
<td><p>‘5.0’</p></td>
125-
<td><p>TPC version (use_internal_tpc=False)</p></td>
123+
<tr class="row-even"><td><p>weights_bias_correction</p></td>
124+
<td><p>True</p></td>
125+
<td><p>Enable weights bias correction (low priority)</p></td>
126126
</tr>
127-
<tr class="row-even"><td><p>num_of_images</p></td>
127+
<tr class="row-odd"><td><p>z_threshold</p></td>
128+
<td><p>float(‘inf’)</p></td>
129+
<td><p>Z-threshold for quantization (low priority)</p></td>
130+
</tr>
131+
<tr class="row-even"><td><p>linear_collapsing</p></td>
132+
<td><p>True</p></td>
133+
<td><p>Enable linear layer collapsing (low priority)</p></td>
134+
</tr>
135+
<tr class="row-odd"><td><p>residual_collapsing</p></td>
136+
<td><p>True</p></td>
137+
<td><p>Enable residual connection collapsing (low priority)</p></td>
138+
</tr>
139+
<tr class="row-even"><td><p>distance_weighting_method</p></td>
140+
<td><p>See <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html">MixedPrecisionQuantizationConfig</a></p></td>
141+
<td><p>Distance weighting method for mixed precision (low priority)</p></td>
142+
</tr>
143+
<tr class="row-odd"><td><p>num_of_images</p></td>
128144
<td><p>5</p></td>
129145
<td><p>Number of images for mixed precision</p></td>
130146
</tr>
131-
<tr class="row-odd"><td><p>use_hessian_based_scores</p></td>
147+
<tr class="row-even"><td><p>use_hessian_based_scores</p></td>
132148
<td><p>False</p></td>
133-
<td><p>Use Hessian-based scores for mixed precision</p></td>
149+
<td><p>Use Hessian-based scores for mixed precision (low priority)</p></td>
134150
</tr>
135-
<tr class="row-even"><td><p>weights_compression_ratio</p></td>
136-
<td><p>None</p></td>
137-
<td><p>Weights compression ratio for resource util</p></td>
151+
<tr class="row-odd"><td><p>weights_compression_ratio</p></td>
152+
<td><p>0.75</p></td>
153+
<td><p>Weights compression ratio for resource util (0.0~1.0)</p></td>
138154
</tr>
139-
<tr class="row-odd"><td><p>save_model_path</p></td>
155+
<tr class="row-even"><td><p>save_model_path</p></td>
140156
<td><p>‘./qmodel.keras’ / ‘./qmodel.onnx’</p></td>
141157
<td><p>Path to save quantized model (Keras/Pytorch)</p></td>
142158
</tr>
@@ -156,21 +172,37 @@ <h3>Navigation</h3>
156172
</tr>
157173
</thead>
158174
<tbody>
159-
<tr class="row-even"><td><p>target_platform_version</p></td>
160-
<td><p>‘v1’</p></td>
161-
<td><p>Target platform version (use_internal_tpc=True)</p></td>
175+
<tr class="row-even"><td><p>sdsp_version</p></td>
176+
<td><p>‘3.14’</p></td>
177+
<td><p>SDSP version for TPC</p></td>
178+
</tr>
179+
<tr class="row-odd"><td><p>activation_error_method</p></td>
180+
<td><p>mct.core.QuantizationErrorMethod.MSE</p></td>
181+
<td><p>Activation quantization error method (low priority)</p></td>
182+
</tr>
183+
<tr class="row-even"><td><p>weights_bias_correction</p></td>
184+
<td><p>True</p></td>
185+
<td><p>Enable weights bias correction (low priority)</p></td>
186+
</tr>
187+
<tr class="row-odd"><td><p>z_threshold</p></td>
188+
<td><p>float(‘inf’)</p></td>
189+
<td><p>Z-threshold for quantization (low priority)</p></td>
190+
</tr>
191+
<tr class="row-even"><td><p>linear_collapsing</p></td>
192+
<td><p>True</p></td>
193+
<td><p>Enable linear layer collapsing (low priority)</p></td>
162194
</tr>
163-
<tr class="row-odd"><td><p>tpc_version</p></td>
164-
<td><p>‘5.0’</p></td>
165-
<td><p>TPC version (use_internal_tpc=False)</p></td>
195+
<tr class="row-odd"><td><p>residual_collapsing</p></td>
196+
<td><p>True</p></td>
197+
<td><p>Enable residual connection collapsing (low priority)</p></td>
166198
</tr>
167199
<tr class="row-even"><td><p>n_epochs</p></td>
168200
<td><p>5</p></td>
169201
<td><p>Number of training epochs for GPTQ</p></td>
170202
</tr>
171203
<tr class="row-odd"><td><p>optimizer</p></td>
172-
<td><p>None</p></td>
173-
<td><p>Optimizer for GPTQ training</p></td>
204+
<td><p>default of <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config">get_keras_gptq_config</a> or <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config">get_pytorch_gptq_config</a></p></td>
205+
<td><p>Optimizer for GPTQ training (low priority)</p></td>
174206
</tr>
175207
<tr class="row-even"><td><p>save_model_path</p></td>
176208
<td><p>‘./qmodel.keras’ / ‘./qmodel.onnx’</p></td>
@@ -192,43 +224,63 @@ <h3>Navigation</h3>
192224
</tr>
193225
</thead>
194226
<tbody>
195-
<tr class="row-even"><td><p>target_platform_version</p></td>
196-
<td><p>v1</p></td>
197-
<td><p>Target platform version (use_internal_tpc=True)</p></td>
227+
<tr class="row-even"><td><p>sdsp_version</p></td>
228+
<td><p>3.14</p></td>
229+
<td><p>SDSP version for TPC</p></td>
198230
</tr>
199-
<tr class="row-odd"><td><p>tpc_version</p></td>
200-
<td><p>‘5.0’</p></td>
201-
<td><p>TPC version (use_internal_tpc=False)</p></td>
231+
<tr class="row-odd"><td><p>activation_error_method</p></td>
232+
<td><p>mct.core.QuantizationErrorMethod.MSE</p></td>
233+
<td><p>Activation quantization error method (low priority)</p></td>
202234
</tr>
203-
<tr class="row-even"><td><p>n_epochs</p></td>
235+
<tr class="row-even"><td><p>weights_bias_correction</p></td>
236+
<td><p>True</p></td>
237+
<td><p>Enable weights bias correction (low priority)</p></td>
238+
</tr>
239+
<tr class="row-odd"><td><p>z_threshold</p></td>
240+
<td><p>float(‘inf’)</p></td>
241+
<td><p>Z-threshold for quantization (low priority)</p></td>
242+
</tr>
243+
<tr class="row-even"><td><p>linear_collapsing</p></td>
244+
<td><p>True</p></td>
245+
<td><p>Enable linear layer collapsing (low priority)</p></td>
246+
</tr>
247+
<tr class="row-odd"><td><p>residual_collapsing</p></td>
248+
<td><p>True</p></td>
249+
<td><p>Enable residual connection collapsing (low priority)</p></td>
250+
</tr>
251+
<tr class="row-even"><td><p>weights_compression_ratio</p></td>
252+
<td><p>0.75</p></td>
253+
<td><p>Weights compression ratio for resource util (0.0~1.0)</p></td>
254+
</tr>
255+
<tr class="row-odd"><td><p>n_epochs</p></td>
204256
<td><p>5</p></td>
205257
<td><p>Number of training epochs for GPTQ</p></td>
206258
</tr>
207-
<tr class="row-odd"><td><p>optimizer</p></td>
208-
<td><p>None</p></td>
209-
<td><p>Optimizer for GPTQ training</p></td>
259+
<tr class="row-even"><td><p>optimizer</p></td>
260+
<td><p>default of <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_keras_gptq_config.html#model_compression_toolkit.gptq.get_keras_gptq_config">get_keras_gptq_config</a> or <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/methods/get_pytroch_gptq_config.html#model_compression_toolkit.gptq.get_pytorch_gptq_config">get_pytorch_gptq_config</a></p></td>
261+
<td><p>Optimizer for GPTQ training (low priority)</p></td>
262+
</tr>
263+
<tr class="row-odd"><td><p>distance_weighting_method</p></td>
264+
<td><p>See <a class="reference external" href="https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/classes/MixedPrecisionQuantizationConfig.html">MixedPrecisionQuantizationConfig</a></p></td>
265+
<td><p>Distance weighting method for mixed precision (low priority)</p></td>
210266
</tr>
211267
<tr class="row-even"><td><p>num_of_images</p></td>
212268
<td><p>5</p></td>
213269
<td><p>Number of images for mixed precision</p></td>
214270
</tr>
215271
<tr class="row-odd"><td><p>use_hessian_based_scores</p></td>
216272
<td><p>False</p></td>
217-
<td><p>Use Hessian-based scores for mixed precision</p></td>
273+
<td><p>Use Hessian-based scores for mixed precision (low priority)</p></td>
218274
</tr>
219-
<tr class="row-even"><td><p>weights_compression_ratio</p></td>
220-
<td><p>None</p></td>
221-
<td><p>Weights compression ratio for resource util</p></td>
222-
</tr>
223-
<tr class="row-odd"><td><p>save_model_path</p></td>
275+
<tr class="row-even"><td><p>save_model_path</p></td>
224276
<td><p>‘./qmodel.keras’ / ‘./qmodel.onnx’</p></td>
225277
<td><p>Path to save quantized model (Keras/Pytorch)</p></td>
226278
</tr>
227279
</tbody>
228280
</table>
229281
<dl class="py method">
230282
<dt class="sig sig-object py" id="model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export">
231-
<span class="sig-name descname"><span class="pre">quantize_and_export</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">float_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">representative_dataset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'PTQ'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">framework</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'pytorch'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_internal_tpc</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_mixed_precision</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">param_items</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export" title="Link to this definition"></a></dt>
283+
<span class="sig-name descname"><span class="pre">quantize_and_export</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">float_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">representative_dataset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">framework</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'pytorch'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'PTQ'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_mixed_precision</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">param_items</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export" title="Link to this definition"></a></dt>
232284
<dd><p>Main function to perform model quantization and export.</p>
233285
<dl class="field-list simple">
234286
<dt class="field-odd">Return type<span class="colon">:</span></dt>
@@ -238,12 +290,10 @@ <h3>Navigation</h3>
238290
<dd class="field-even"><ul class="simple">
239291
<li><p><strong>float_model</strong> – The float model to be quantized.</p></li>
240292
<li><p><strong>representative_dataset</strong> (<em>Callable</em><em>, </em><em>np.array</em><em>, </em><em>tf.Tensor</em>) – Representative dataset for calibration.</p></li>
241-
<li><p><strong>method</strong> (<em>str</em>) – Quantization method, e.g., ‘PTQ’ or ‘GPTQ’.
242-
Default: ‘PTQ’</p></li>
243293
<li><p><strong>framework</strong> (<em>str</em>) – ‘tensorflow’ or ‘pytorch’.
244294
Default: ‘pytorch’</p></li>
245-
<li><p><strong>use_internal_tpc</strong> (<em>bool</em>) – Whether to use internal_tpc.
246-
Default: True</p></li>
295+
<li><p><strong>method</strong> (<em>str</em>) – Quantization method, e.g., ‘PTQ’ or ‘GPTQ’.
296+
Default: ‘PTQ’</p></li>
247297
<li><p><strong>use_mixed_precision</strong> (<em>bool</em>) – Whether to use mixed-precision
248298
quantization. Default: False</p></li>
249299
<li><p><strong>param_items</strong> (<em>list</em>) – List of parameter settings.
@@ -268,10 +318,9 @@ <h3>Navigation</h3>
268318
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">wrapper</span> <span class="o">=</span> <span class="n">mct</span><span class="o">.</span><span class="n">MCTWrapper</span><span class="p">()</span>
269319
</pre></div>
270320
</div>
271-
<p>set method, framework, and other parameters</p>
272-
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">method</span> <span class="o">=</span> <span class="s1">&#39;PTQ&#39;</span>
273-
<span class="gp">&gt;&gt;&gt; </span><span class="n">framework</span> <span class="o">=</span> <span class="s1">&#39;tensorflow&#39;</span>
274-
<span class="gp">&gt;&gt;&gt; </span><span class="n">use_internal_tpc</span> <span class="o">=</span> <span class="kc">True</span>
321+
<p>set framework, method, and other parameters</p>
322+
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">framework</span> <span class="o">=</span> <span class="s1">&#39;tensorflow&#39;</span>
323+
<span class="gp">&gt;&gt;&gt; </span><span class="n">method</span> <span class="o">=</span> <span class="s1">&#39;PTQ&#39;</span>
275324
<span class="gp">&gt;&gt;&gt; </span><span class="n">use_mixed_precision</span> <span class="o">=</span> <span class="kc">False</span>
276325
</pre></div>
277326
</div>
@@ -283,9 +332,8 @@ <h3>Navigation</h3>
283332
<div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">flag</span><span class="p">,</span> <span class="n">quantized_model</span> <span class="o">=</span> <span class="n">wrapper</span><span class="o">.</span><span class="n">quantize_and_export</span><span class="p">(</span>
284333
<span class="gp">... </span> <span class="n">float_model</span><span class="o">=</span><span class="n">float_model</span><span class="p">,</span>
285334
<span class="gp">... </span> <span class="n">representative_dataset</span><span class="o">=</span><span class="n">representative_dataset</span><span class="p">,</span>
286-
<span class="gp">... </span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span>
287335
<span class="gp">... </span> <span class="n">framework</span><span class="o">=</span><span class="n">framework</span><span class="p">,</span>
288-
<span class="gp">... </span> <span class="n">use_internal_tpc</span><span class="o">=</span><span class="n">use_internal_tpc</span><span class="p">,</span>
336+
<span class="gp">... </span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span>
289337
<span class="gp">... </span> <span class="n">use_mixed_precision</span><span class="o">=</span><span class="n">use_mixed_precision</span><span class="p">,</span>
290338
<span class="gp">... </span> <span class="n">param_items</span><span class="o">=</span><span class="n">param_items</span>
291339
<span class="gp">... </span><span class="p">)</span>

docs/searchindex.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)