|
| 1 | +[general] |
| 2 | +#version of ggml-hexagon.cpp on ARM-AP side |
| 3 | +version = "1.08" |
| 4 | +#version of ggml-dsp.c on cDSP side |
| 5 | +ggmldsp_version = "0.96" |
| 6 | + |
| 7 | +#0: HEXAGON_BACKEND_QNNCPU |
| 8 | +#1: HEXAGON_BACKEND_QNNGPU |
| 9 | +#2: HEXAGON_BACKEND_QNNNPU |
| 10 | +#3: HEXAGON_BACKEND_CDSP |
| 11 | +#4: default ggml backend |
| 12 | +hexagon_backend = 3 |
| 13 | +# 0: hwaccel approach through HWACCEL_QNN: offload ggml op to QNN |
| 14 | +# 1: hwaccel approach through HWACCEL_QNN_SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph |
| 15 | +# 2: hwaccel approach through HWACCEL_CDSP:offload ggml op to cDSP directly |
| 16 | +hwaccel_approach = 2 |
| 17 | +# |
| 18 | +#attention: |
| 19 | +# a. HWACCEL_QNN_SINGLEGRAPH not supported at the moment; |
| 20 | +# b. following combinations are valid: |
| 21 | +# 1: hwaccel_approach = 2 AND hexagon_backend = 3(HWACCEL_CDSP, this is the default setting) |
| 22 | +# 2: hwaccel_approach = 0 AND hexagon_backend = 2(QNNNPU) |
| 23 | +# 3: hwaccel_approach = 0 AND hexagon_backend = 1(QNNGPU) |
| 24 | +# 4: hwaccel_approach = 0 AND hexagon_backend = 0(QNNCPU) |
| 25 | +# 5: hwaccel_approach = 2 AND hexagon_backend = 4(fall back to the default ggml backend) |
| 26 | +# 6: hwaccel_approach = 0 AND hexagon_backend = 4(fall back to the default ggml backend) |
| 27 | +# |
| 28 | +#generally speaking, |
| 29 | +# a. we only need to focus on b-1(HWACCEL_CDSP) and b-2(QNNNPU). |
| 30 | +# b. we can compare Hexagon NPU performance between HWACCEL_CDSP/QNNNPU/the default ggml backend accordingly |
| 31 | + |
| 32 | + |
| 33 | +#enable/disable offload quantized type mulmat |
| 34 | +#quatized type mulmat works fine through QNNNPU at the moment |
| 35 | +#quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment |
| 36 | +#this item will make mulmat performance comprision easily |
| 37 | +enable_q_mulmat = 1 |
| 38 | + |
| 39 | + |
| 40 | +# enable/disable print tensors info in op function |
| 41 | +print_tensors_info = 0 |
| 42 | +# enable/disable dump op info in handle_op |
| 43 | +dump_op_info = 0 |
| 44 | + |
| 45 | + |
| 46 | +# enable/disable perf of op function |
| 47 | +# this is the default setting |
| 48 | +enable_perf = 1 |
| 49 | + |
| 50 | + |
| 51 | +# enablie/disable profiler feature to visually compare NPU performance between HWACCEL_CDSP and QNNNPU |
| 52 | +# this is default setting |
| 53 | +enable_profiler = 0 |
| 54 | +#threshold duration of NPU performance profiler, per seconds |
| 55 | +profiler_duration = 5 |
| 56 | +#threshold counst of NPU performance profiler |
| 57 | +profiler_counts = 200 |
| 58 | +#attention: |
| 59 | +# NPU performance might be slower when enable_profiler = 1 because of file I/O in this feature; |
| 60 | +# ensure enable_perf = 1 when set enable_profiler = 1; |
| 61 | + |
| 62 | + |
| 63 | +#enable/disable pinned-memory feature |
| 64 | +enable_pinned_memory = 0 |
| 65 | + |
| 66 | +#hwaccel approach through QNN(offload ggml op to QNN-NPU) |
| 67 | +[qnn] |
| 68 | +# enable/disable QNN SDK's internal log, this will very helpful for troubleshooting in HWACCEL_QNN approach |
| 69 | +print_qnn_internal_log = 0 |
| 70 | + |
| 71 | +hvx_threads = 8 |
| 72 | +vtcm_size_in_mb = 8 |
| 73 | +enable_dlbc = 1 |
| 74 | +precision_mode = "fp16" |
| 75 | + |
| 76 | + |
| 77 | +#hwaccel approach through cDSP(offload ggml op to Hexagon cDSP directly) |
| 78 | +[cdsp] |
| 79 | +#enable/disable rpc ion memory pool |
| 80 | +enable_rpc_ion_mempool = 1 |
| 81 | + |
| 82 | +#enable/disable offload all quantized type mulmat to cDSP |
| 83 | +enable_all_q_mulmat = 1 |
| 84 | +#attention: |
| 85 | +#ensure enable_q_mulmat = 1 when set enable_all_q_mulmat = 1 |
| 86 | + |
| 87 | +#enable/disable multi-threading on cDSP side |
| 88 | +# 0 disable multi-threading on cDSP side |
| 89 | +# 1 disable multi-threading on cDSP side |
| 90 | +# 2-8 thread_counts on cDSP side |
| 91 | +thread_counts = 8 |
0 commit comments