Skip to content

Commit b67e656

Browse files
chenjKexyBiscuit
authored andcommitted
AOSCOS: memset optimization for loongson-3
[Mingcong Bai: Resolved a minor conflict in arch/mips/loongson64/Makefile.] Signed-off-by: Mingcong Bai <[email protected]> Signed-off-by: chenj <[email protected]> Signed-off-by: Kexy Biscuit <[email protected]>
1 parent fc4fb8f commit b67e656

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed

arch/mips/loongson64/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o dma.o \
66
setup.o init.o env.o time.o reset.o \
77
constant_timer.o ec_wpce775l.o platform.o \
8+
loongson3-memset.o \
89

910
obj-$(CONFIG_SMP) += smp.o
1011
obj-$(CONFIG_NUMA) += numa.o
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
* This file is subject to the terms and conditions of the GNU General Public
3+
* License. See the file "COPYING" in the main directory of this archive
4+
* for more details.
5+
*
6+
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle
7+
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8+
* Copyright (C) 2007 by Maciej W. Rozycki
9+
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
10+
*/
11+
#include <asm/asm.h>
12+
#include <asm/asm-offsets.h>
13+
#include <asm/export.h>
14+
#include <asm/regdef.h>
15+
16+
#define LONG_S_L sdl
17+
#define LONG_S_R sdr
18+
19+
#define STORSIZE 16
20+
#define STORMASK 15
21+
22+
#define EX(insn,reg,addr,handler) \
23+
9: insn reg, addr; \
24+
.section __ex_table,"a"; \
25+
PTR 9b, handler; \
26+
.previous
27+
28+
#define EX_GSSQ(reg, addr, handler) \
29+
.set push; \
30+
.set arch=loongson3a; \
31+
9: gssq reg, reg, addr; \
32+
.set pop; \
33+
.section __ex_table,"a"; \
34+
PTR 9b, handler; \
35+
.previous
36+
37+
.macro f_fill128 dst, offset, val, fixup
38+
EX_GSSQ(\val, (\offset + 0 * STORSIZE)(\dst), \fixup)
39+
EX_GSSQ(\val, (\offset + 1 * STORSIZE)(\dst), \fixup)
40+
EX_GSSQ(\val, (\offset + 2 * STORSIZE)(\dst), \fixup)
41+
EX_GSSQ(\val, (\offset + 3 * STORSIZE)(\dst), \fixup)
42+
EX_GSSQ(\val, (\offset + 4 * STORSIZE)(\dst), \fixup)
43+
EX_GSSQ(\val, (\offset + 5 * STORSIZE)(\dst), \fixup)
44+
EX_GSSQ(\val, (\offset + 6 * STORSIZE)(\dst), \fixup)
45+
EX_GSSQ(\val, (\offset + 7 * STORSIZE)(\dst), \fixup)
46+
.endm
47+
48+
/*
49+
* memset(void *s, int c, size_t n)
50+
*
51+
* a0: start of area to clear
52+
* a1: char to fill with
53+
* a2: size of area to clear
54+
*/
55+
.set noreorder
56+
.align 5
57+
LEAF(memset)
58+
EXPORT_SYMBOL(memset)
59+
beqz a1, 1f
60+
move v0, a0 /* result */
61+
62+
andi a1, 0xff /* spread fillword */
63+
LONG_SLL t1, a1, 8
64+
or a1, t1
65+
LONG_SLL t1, a1, 16
66+
or a1, t1
67+
LONG_SLL t1, a1, 32
68+
or a1, t1
69+
1:
70+
71+
FEXPORT(__bzero)
72+
EXPORT_SYMBOL(__bzero)
73+
sltiu t0, a2, STORSIZE /* very small region? */
74+
bnez t0, .Lsmall_memset
75+
andi t0, a0, STORMASK /* aligned? */
76+
77+
.set noat
78+
li AT, STORSIZE
79+
beqz t0, 1f
80+
PTR_SUBU t0, AT /* alignment in bytes */
81+
.set at
82+
83+
EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword 8B aligned */
84+
.set push
85+
.set arch=mips64r2
86+
PTR_ADDIU t1, a0, 8
87+
dins t1, zero, 0, 3
88+
.set pop
89+
EX(LONG_S, a1, (t1), .Lsecond_fixup) /* May double copy 8B */
90+
91+
PTR_SUBU a0, t0 /* long align ptr */
92+
PTR_ADDU a2, t0 /* correct size */
93+
94+
1: ori t1, a2, 0x7f /* # of full blocks */
95+
xori t1, 0x7f
96+
beqz t1, .Lmemset_partial /* no block to fill */
97+
andi t0, a2, 0x80-STORSIZE
98+
99+
PTR_ADDU t1, a0 /* end address */
100+
.set reorder
101+
1: PTR_ADDIU a0, 128
102+
f_fill128 a0, -128, a1, .Lfwd_fixup
103+
bne t1, a0, 1b
104+
.set noreorder
105+
106+
.Lmemset_partial:
107+
PTR_LA t1, 2f /* where to start */
108+
.set noat
109+
LONG_SRL AT, t0, 2
110+
PTR_SUBU t1, AT
111+
.set at
112+
jr t1
113+
PTR_ADDU a0, t0 /* dest ptr */
114+
115+
.set push
116+
.set noreorder
117+
.set nomacro
118+
f_fill128 a0, -128, a1, .Lpartial_fixup /* ... but first do 16Bs ... */
119+
2: .set pop
120+
andi a2, STORMASK /* At most 15B to go */
121+
122+
beqz a2, 1f
123+
PTR_ADDU a0, a2 /* What's left */
124+
.set push
125+
.set arch=mips64r2
126+
PTR_ADDI t1, a0, -8
127+
dins t1, zero, 0, 3
128+
.set pop
129+
EX(LONG_S, a1, (t1), .Lnotlast_fixup) /* May double copy 8B */
130+
EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
131+
1: jr ra
132+
move a2, zero
133+
134+
.Lsmall_memset:
135+
andi t1, a2, 7
136+
beq t1, a2, 1f
137+
LONG_SLL t1, 2
138+
139+
EX(LONG_S_R, a1, (a0), .Lfirst_fixup)
140+
EX(LONG_S_L, a1, 7(a0), .Lsmall_memset_fixup)
141+
142+
1: PTR_LA t0, 2f
143+
PTR_SUBU t1, t0, t1
144+
jr t1
145+
PTR_ADDU a0, a2
146+
147+
EX(sb, a1, -7(a0), .Lsmall_memset_partial_fixup)
148+
EX(sb, a1, -6(a0), .Lsmall_memset_partial_fixup)
149+
EX(sb, a1, -5(a0), .Lsmall_memset_partial_fixup)
150+
EX(sb, a1, -4(a0), .Lsmall_memset_partial_fixup)
151+
EX(sb, a1, -3(a0), .Lsmall_memset_partial_fixup)
152+
EX(sb, a1, -2(a0), .Lsmall_memset_partial_fixup)
153+
EX(sb, a1, -1(a0), .Lsmall_memset_partial_fixup)
154+
155+
2: jr ra /* done */
156+
move a2, zero
157+
END(memset)
158+
159+
.Lsmall_memset_fixup:
160+
PTR_ADDIU t0, a0, 8
161+
.set push
162+
.set arch=mips64r2
163+
dins t0, zero, 0, 3
164+
.set pop
165+
LONG_ADDU a2, a0
166+
jr ra
167+
LONG_SUBU a2, t0
168+
169+
.Lsmall_memset_partial_fixup:
170+
PTR_L t0, TI_TASK($28)
171+
LONG_L t0, THREAD_BUADDR(t0)
172+
jr ra
173+
LONG_SUBU a2, a0, t0
174+
175+
.Lfirst_fixup:
176+
jr ra
177+
nop
178+
179+
.Lsecond_fixup:
180+
LONG_ADDU a2, a0
181+
jr ra
182+
LONG_SUBU a2, t1
183+
184+
.Lfwd_fixup:
185+
PTR_L t0, TI_TASK($28)
186+
andi a2, 0x7f
187+
LONG_L t0, THREAD_BUADDR(t0)
188+
LONG_ADDU a2, t1
189+
jr ra
190+
LONG_SUBU a2, t0
191+
192+
.Lpartial_fixup:
193+
PTR_L t0, TI_TASK($28)
194+
andi a2, STORMASK
195+
LONG_L t0, THREAD_BUADDR(t0)
196+
LONG_ADDU a2, a0
197+
jr ra
198+
LONG_SUBU a2, t0
199+
200+
.Llast_fixup:
201+
jr ra
202+
andi a2, 0x7
203+
204+
.Lnotlast_fixup:
205+
jr ra
206+
PTR_SUBU a2, a0, t1

0 commit comments

Comments
 (0)