1+ /*
2+ * Copyright (c) 2025 The ggml authors
3+ *
4+ * Qualcomm Hexagon SDK and reference tech guides could be found at:
5+ * https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
6+ *
7+ * this single-source-file or self-contained file is implementation of ggml-dsp:
8+ * - a customized tiny ggml running on Qualcomm Hexagon cDSP
9+ * - ported from original ggml
10+ *
11+ * Permission is hereby granted, free of charge, to any person obtaining a copy
12+ * of this software and associated documentation files (the "Software"), to
13+ * deal in the Software without restriction, including without limitation the
14+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
15+ * sell copies of the Software, and to permit persons to whom the Software is
16+ * furnished to do so, subject to the following conditions:
17+ *
18+ * The above copyright notice and this permission notice shall be included in
19+ * all copies or substantial portions of the Software.
20+ *
21+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27+ * IN THE SOFTWARE.
28+ */
129#include "ggml-dsp.h"
230
3- // =================================================================================================
4- // tiny ggml-dsp, ported from original ggml
5- // =================================================================================================
6- static int32 g_thread_counts = 1 ;
7-
831void ggmlhexagon_log_internal (int level , const char * file , const char * func , int line , const char * format , ...) {
932#if !GGMLHEXAGON_DEBUG
1033 return ;
@@ -30,7 +53,7 @@ void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
3053 char tmpbuf [GGMLHEXAGON_LOGBUF_LEN ];
3154 size_t buflen = 0 ;
3255 if (tensor -> type == GGML_TYPE_F32 ) {
33- memset (tmpbuf , 0 , GGMLHEXAGON_LOG_LEVEL_DEBUG );
56+ memset (tmpbuf , 0 , GGMLHEXAGON_LOGBUF_LEN );
3457 for (int h = 0 ; h < tensor -> ne [3 ]; h ++ ) {
3558 for (int i = 0 ; i < tensor -> ne [2 ]; i ++ ) {
3659 for (int j = 0 ; j < tensor -> ne [1 ]; j ++ ) {
@@ -173,116 +196,3 @@ int64_t ggml_time_ms(void) {
173196int64_t ggml_time_us (void ) {
174197 return hexagon_perf_get_time_us ();
175198}
176-
177- int ggmlop_get_thread_counts (void ) {
178- return g_thread_counts ;
179- }
180-
181- // =================================================================================================
182- // implementation of ggml-hexagon kernel skel function
183- // =================================================================================================
184- int ggmlop_dsp_open (const char * uri , remote_handle64 * handle ) {
185- void * tptr = NULL ;
186- GGMLHEXAGON_LOG_DEBUG ("uri %s" , uri );
187- tptr = (void * )malloc (1 );
188- * handle = (remote_handle64 )tptr ;
189- assert (* handle );
190-
191- GGMLHEXAGON_LOG_DEBUG ("api_version = 0x%x" , qurt_api_version ());
192- GGMLHEXAGON_LOG_DEBUG ("hvx units = 0x%d" , qurt_hvx_get_units ());
193- qurt_arch_version_t vers ;
194- qurt_sysenv_get_arch_version (& vers );
195- GGMLHEXAGON_LOG_DEBUG ("arch_version=0x%x" , vers .arch_version );
196- qurt_sysenv_app_heap_t aheap ;
197- qurt_sysenv_get_app_heap (& aheap );
198- GGMLHEXAGON_LOG_DEBUG ("aheap.heap_base=0x%x, aheap.heap_limit=0x%x" , aheap .heap_base , aheap .heap_limit );
199- qurt_sysenv_max_hthreads_t mhwt ;
200- qurt_sysenv_get_max_hw_threads (& mhwt );
201- GGMLHEXAGON_LOG_DEBUG ("max hardware threads counts=%d" , mhwt .max_hthreads );
202- g_thread_counts = mhwt .max_hthreads ;
203-
204- return 0 ;
205- }
206-
207- int ggmlop_dsp_close (remote_handle64 handle ) {
208- if (handle )
209- free ((void * )handle );
210-
211- return 0 ;
212- }
213-
214- AEEResult ggmlop_dsp_setclocks (remote_handle64 handle , int32 power_level , int32 latency , int32 dcvs_enabled , int32 thread_counts ) {
215- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
216- HAP_power_request_t request ;
217- memset (& request , 0 , sizeof (HAP_power_request_t ));
218- request .type = HAP_power_set_apptype ;
219- request .apptype = HAP_POWER_COMPUTE_CLIENT_CLASS ;
220-
221- GGMLHEXAGON_LOG_DEBUG ("user specified thread_counts %d" , thread_counts );
222- if (thread_counts > 1 )
223- g_thread_counts = (thread_counts > g_thread_counts ) ? g_thread_counts : thread_counts ;
224- else
225- g_thread_counts = 1 ;
226- GGMLHEXAGON_LOG_DEBUG ("real thread_counts %d" , g_thread_counts );
227-
228- void * ggmop_ctx = (void * )(handle );
229- int retval = HAP_power_set (ggmop_ctx , & request );
230- if (retval ) {
231- GGMLHEXAGON_LOG_DEBUG ("failed first power vote" );
232- return AEE_EFAILED ;
233- }
234-
235- //configure clocks & DCVS mode
236- memset (& request , 0 , sizeof (HAP_power_request_t ));
237- request .type = HAP_power_set_DCVS_v2 ;
238- request .dcvs_v2 .dcvs_enable = TRUE;
239- request .dcvs_v2 .dcvs_params .target_corner = (HAP_dcvs_voltage_corner_t )power_level ;
240- if (dcvs_enabled ) {
241- request .dcvs_v2 .dcvs_params .min_corner = HAP_DCVS_VCORNER_DISABLE ;
242- request .dcvs_v2 .dcvs_params .max_corner = HAP_DCVS_VCORNER_DISABLE ;
243- } else {
244- request .dcvs_v2 .dcvs_params .min_corner = request .dcvs_v2 .dcvs_params .target_corner ;
245- request .dcvs_v2 .dcvs_params .max_corner = request .dcvs_v2 .dcvs_params .target_corner ;
246- }
247- request .dcvs_v2 .dcvs_option = HAP_DCVS_V2_PERFORMANCE_MODE ;
248- request .dcvs_v2 .set_dcvs_params = TRUE;
249- request .dcvs_v2 .set_latency = TRUE;
250- request .dcvs_v2 .latency = latency ;
251- retval = HAP_power_set (ggmop_ctx , & request );
252- if (retval ) {
253- GGMLHEXAGON_LOG_DEBUG ("failed to vote for performance mode" );
254- return AEE_EFAILED ;
255- }
256-
257- memset (& request , 0 , sizeof (HAP_power_request_t ));
258- request .type = HAP_power_set_HVX ;
259- request .hvx .power_up = TRUE;
260- retval = HAP_power_set (ggmop_ctx , & request );
261- if (retval ) {
262- GGMLHEXAGON_LOG_DEBUG ("failed to vote for HVX power" );
263- return AEE_EFAILED ;
264- }
265- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
266- return AEE_SUCCESS ;
267- }
268-
269- // =================================================================================================
270- // implementation of ggml-hexagon kernel, it's better to put every hexagon-kernel to a single file
271- // =================================================================================================
272- int ggmlop_dsp_softmax (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
273- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
274- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
275- return 0 ;
276- }
277-
278- int ggmlop_dsp_rmsnorm (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
279- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
280- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
281- return 0 ;
282- }
283-
284- int ggmlop_dsp_pool2d (remote_handle64 h , const dsptensor * src0 , const dsptensor * src1 , dsptensor * dst ) {
285- GGMLHEXAGON_LOG_DEBUG ("enter %s" , __func__ );
286- GGMLHEXAGON_LOG_DEBUG ("leave %s" , __func__ );
287- return 0 ;
288- }
0 commit comments