Skip to content

Commit f5a892a

Browse files
committed
project: add codes for developers/experts's effort on cDSP side
1 parent 047b200 commit f5a892a

File tree

7 files changed

+964
-7
lines changed

7 files changed

+964
-7
lines changed

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ function(ggml_hexagon_build_kernel KNAME)
113113
COMMAND make -C ${CMAKE_CURRENT_LIST_DIR}/kernels/ clean
114114
COMMAND make -C ${CMAKE_CURRENT_LIST_DIR}/kernels/ HEXAGON_SDK_PATH=${HEXAGON_SDK_PATH} HTP_ARCH_VERSION=${HTP_ARCH_VERSION} DEBUG_FLAG=${DEBUG_FLAG}
115115
COMMAND echo "current working path:`pwd`\n"
116-
COMMAND ls -l ../../../bin/libggmlop-skel.so
116+
COMMAND ls -l ../../../bin/libggmldsp-skel.so
117117
COMMENT "build hexagon-kernel"
118118
)
119119
endfunction()

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,7 +1323,7 @@ class hexagon_perf {
13231323
if (g_hexagon_profiler.profiler_get_frame_index() <= g_hexagon_profiler.profiler_get_threshold_count()) {
13241324
const char * devname = ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend);
13251325
if (g_hexagon_appcfg.hexagon_backend != HEXAGON_BACKEND_GGML) {
1326-
//add this check for a special scenario: a invalid value passed from user's program
1326+
//add this check for a special scenario: an invalid value passed from user's program
13271327
if (0 != memcmp(devname, "unknown", strlen("unknown"))) {
13281328
devname += 16;
13291329
}
@@ -6426,7 +6426,7 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
64266426
dev_index = (int)(intptr_t)params;
64276427
if (dev_index < 0) {
64286428
GGMLHEXAGON_LOG_VERBOSE("it shouldn't happend\n");
6429-
//test-thread-safety might-be running at the moment or a invalid value passed from user's program
6429+
//test-thread-safety might-be running at the moment or an invalid value passed from user's program
64306430
dev_index = 0;
64316431
}
64326432
g_hexagon_appcfg.hexagon_backend = dev_index;

ggml/src/ggml-hexagon/kernels/Makefile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ HEXAGON_COMPUTE=compute${HTP_ARCH_VERSION}
77
HEXAGON_CC=${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang
88
HEXAGON_CXX=${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang
99

10-
TARGET=libggmlop-skel.so
10+
TARGET=libggmldsp-skel.so
1111

1212
$(info HEXAGON_SDK_PATH:${HEXAGON_SDK_PATH})
1313
$(info HTP_ARCH_VERSION:${HTP_ARCH_VERSION})
@@ -23,18 +23,31 @@ LDFLAGS=-m${HTP_ARCH_VERSION} -Wl,--defsym=ISDB_TRUSTED_FLAG=2 -Wl,--defsym=ISDB
2323
#SRCS = $(wildcard *.c)
2424
SRCS = ggml-dsp.c skel.c entry.c add.c mulmat.c
2525
OBJS = $(patsubst %.c, %.o, $(SRCS))
26+
OBJS += dot.o
27+
OBJS += worker_pool.o
2628

2729
ALL:$(OBJS)
2830
${HEXAGON_CC} ${LDFLAGS} -o ${TARGET} -Wl,--start-group ${OBJS} -Wl,--end-group
2931
@ls -l ${TARGET}
3032
/bin/cp -fv ${TARGET} ../../../../out/android/bin/
31-
/bin/cp -fv ${TARGET} ../../../../out/android/bin/libggmlop-skel${HTP_ARCH_VERSION}.so
33+
/bin/cp -fv ${TARGET} ../../../../out/android/bin/libggmldsp-skel${HTP_ARCH_VERSION}.so
3234
/bin/rm -f *.so
3335

3436
%.o:%.c
3537
@echo "${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<"
3638
${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<
3739
@echo "\n"
3840

41+
%.o:%.S
42+
@echo "${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<"
43+
${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<
44+
@echo "\n"
45+
46+
%.o:%.cpp
47+
@echo "${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<"
48+
${HEXAGON_CC} ${CFLAGS} ${DEBUG_FLAG} -D__FILENAME__=\"$<\" -o $@ -c $<
49+
@echo "\n"
50+
3951
clean:
4052
rm -f *.o
53+
/bin/rm -f *.so
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/**=============================================================================
2+
@file
3+
qhblas_f_vector_dot_af.S
4+
5+
@brief
6+
Calculates dot product of two input float vectors.
7+
8+
Function prototype
9+
10+
int32_t qhblas_f_vector_dot_af(float_a8_t *input_1, float_a8_t *input_2, float *output, uint32_t size);
11+
12+
Reference C code
13+
14+
int32_t qhblas_f_vector_dot_af(float_a8_t *input_1, float_a8_t *input_2, float *output, uint32_t size)
15+
{
16+
if ((input_1 == NULL) || (input_2 == NULL) || (output == NULL) || (size == 0))
17+
{
18+
return -1;
19+
}
20+
21+
float dot = 0;
22+
for (uint32_t i = 0; i < size; ++i)
23+
{
24+
dot += input_1[i] * input_2[i];
25+
}
26+
27+
*output = dot;
28+
return 0;
29+
}
30+
31+
Copyright (c) 2019 Qualcomm Technologies Incorporated.
32+
All Rights Reserved. Qualcomm Proprietary and Confidential.
33+
=============================================================================**/
34+
35+
/*============================================================================*/
36+
37+
.p2align 2
38+
.p2align 4,,15
39+
.global qhblas_f_vector_dot_af
40+
.type qhblas_f_vector_dot_af, @function
41+
42+
/*============================================================================*/
43+
44+
#define DC_PREFETCH_AHEAD 64 // number of bytes for DCFETCH
45+
#define L2_PREFETCH_AHEAD 256 // number of bytes for L2FETCH
46+
#define L2FETCH_CONFIG 0x0100FF00+(L2_PREFETCH_AHEAD/256) // [stride = 256 : width = 255 : height = bytes/256]
47+
#define L2_PREFETCH_ELEMS L2_PREFETCH_AHEAD/8 // number of elements to prefetch with L2FETCH
48+
49+
/*============================================================================*/
50+
51+
qhblas_f_vector_dot_af:
52+
{
53+
p0 = !cmp.eq(r0,#0) // input_1 != NULL
54+
p0 = !cmp.eq(r1,#0) // input_2 != NULL
55+
p0 = !cmp.eq(r2,#0) // output != NULL
56+
p0 = cmp.gtu(r3,#0) // size > 0
57+
if (!p0.new) jump:nt .L_ret
58+
}
59+
{
60+
r10 = #0
61+
r3 = lsr(r3,#1) // size / 2
62+
p1 = tstbit(r3,#0) // check for odd size
63+
if(cmp.eq(r3.new,#0)) jump:nt .L_do_one
64+
}
65+
{
66+
r7:6 = #0
67+
r9:8 = #0
68+
r5 = add(r3,#7) // (size / 2) + 7
69+
p2 = cmp.gtu(r3,#L2_PREFETCH_ELEMS) // check whether we can do l2fetch
70+
}
71+
{
72+
r5 = lsr(r5,#3) // ceil(size / 2)
73+
r14 = mux(p2,r3,#0) // set l2fetch counter
74+
}
75+
{
76+
r13:12 = combine(##L2FETCH_CONFIG,#8) // set l2fetch config and max number of iterations for .L_loop_do_two
77+
loop1(.L_prefetch_loop_do_two,r5)
78+
}
79+
.falign
80+
.L_prefetch_loop_do_two:
81+
{
82+
dcfetch(r0+#DC_PREFETCH_AHEAD) // prefetch ahead for input_1
83+
r5 = min(r12,r3) // min(8, size / 2)
84+
}
85+
{
86+
dcfetch(r1+#DC_PREFETCH_AHEAD) // prefetch ahead for input_2
87+
loop0(.L_loop_do_two,r5)
88+
p2 = cmp.eq(r3,r14) // check whether to do l2fetch
89+
if (!p2.new) jump:t .L_loop_do_two
90+
}
91+
{
92+
r5 = add(r3,#-L2_PREFETCH_ELEMS) // number of elements left to prefetch ahead
93+
r15 = add(r0,#L2_PREFETCH_AHEAD) // input_1 addr for l2fetch
94+
}
95+
{
96+
p2 = cmp.gtu(r5,#L2_PREFETCH_ELEMS) // check whether we can continue to do l2fetch
97+
r15 = add(r1,#L2_PREFETCH_AHEAD) // input_2 addr for l2fetch
98+
l2fetch(r15,r13)
99+
}
100+
{
101+
if (p2) r14 = add(r14,#-L2_PREFETCH_ELEMS) // adjust l2fetch counter
102+
if (!p2) r14 = #0 // there are no more bytes left to prefetch ahead
103+
l2fetch(r15,r13)
104+
}
105+
.falign
106+
.L_loop_do_two:
107+
{
108+
r7:6 = memd(r0++#8)
109+
r9:8 = memd(r1++#8)
110+
r10 += sfmpy(r7,r9)
111+
}
112+
{
113+
r10 += sfmpy(r6,r8)
114+
r3 = add(r3,#-1) // adjust (size / 2)
115+
}:endloop0:endloop1
116+
{
117+
r10 += sfmpy(r7,r9)
118+
if (!p1) jump:nt .L_ret
119+
}
120+
.falign
121+
.L_do_one:
122+
{
123+
r4 = memw(r0)
124+
r5 = memw(r1)
125+
}
126+
{
127+
r10 += sfmpy(r4,r5)
128+
}
129+
.falign
130+
.L_ret:
131+
{
132+
if (p0) memw(r2) = r10
133+
r0 = mux(p0,#0,#-1)
134+
jumpr r31
135+
}
136+
.size qhblas_f_vector_dot_af, .-qhblas_f_vector_dot_af

0 commit comments

Comments
 (0)