|
| 1 | +/* |
| 2 | + * This file is subject to the terms and conditions of the GNU General Public |
| 3 | + * License. See the file "COPYING" in the main directory of this archive |
| 4 | + * for more details. |
| 5 | + * |
| 6 | + * Copyright (C) 1998, 1999, 2000 by Ralf Baechle |
| 7 | + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. |
| 8 | + * Copyright (C) 2007 by Maciej W. Rozycki |
| 9 | + * Copyright (C) 2011, 2012 MIPS Technologies, Inc. |
| 10 | + */ |
| 11 | +#include <asm/asm.h> |
| 12 | +#include <asm/asm-offsets.h> |
| 13 | +#include <asm/export.h> |
| 14 | +#include <asm/regdef.h> |
| 15 | + |
| 16 | +#define LONG_S_L sdl |
| 17 | +#define LONG_S_R sdr |
| 18 | + |
| 19 | +#define STORSIZE 16 |
| 20 | +#define STORMASK 15 |
| 21 | + |
| 22 | +#define EX(insn,reg,addr,handler) \ |
| 23 | +9: insn reg, addr; \ |
| 24 | + .section __ex_table,"a"; \ |
| 25 | + PTR 9b, handler; \ |
| 26 | + .previous |
| 27 | + |
| 28 | +#define EX_GSSQ(reg, addr, handler) \ |
| 29 | + .set push; \ |
| 30 | + .set arch=loongson3a; \ |
| 31 | +9: gssq reg, reg, addr; \ |
| 32 | + .set pop; \ |
| 33 | + .section __ex_table,"a"; \ |
| 34 | + PTR 9b, handler; \ |
| 35 | + .previous |
| 36 | + |
| 37 | + .macro f_fill128 dst, offset, val, fixup |
| 38 | + EX_GSSQ(\val, (\offset + 0 * STORSIZE)(\dst), \fixup) |
| 39 | + EX_GSSQ(\val, (\offset + 1 * STORSIZE)(\dst), \fixup) |
| 40 | + EX_GSSQ(\val, (\offset + 2 * STORSIZE)(\dst), \fixup) |
| 41 | + EX_GSSQ(\val, (\offset + 3 * STORSIZE)(\dst), \fixup) |
| 42 | + EX_GSSQ(\val, (\offset + 4 * STORSIZE)(\dst), \fixup) |
| 43 | + EX_GSSQ(\val, (\offset + 5 * STORSIZE)(\dst), \fixup) |
| 44 | + EX_GSSQ(\val, (\offset + 6 * STORSIZE)(\dst), \fixup) |
| 45 | + EX_GSSQ(\val, (\offset + 7 * STORSIZE)(\dst), \fixup) |
| 46 | + .endm |
| 47 | + |
| 48 | +/* |
| 49 | + * memset(void *s, int c, size_t n) |
| 50 | + * |
| 51 | + * a0: start of area to clear |
| 52 | + * a1: char to fill with |
| 53 | + * a2: size of area to clear |
| 54 | + */ |
| 55 | + .set noreorder |
| 56 | + .align 5 |
| 57 | +LEAF(memset) |
| 58 | +EXPORT_SYMBOL(memset) |
| 59 | + beqz a1, 1f |
| 60 | + move v0, a0 /* result */ |
| 61 | + |
| 62 | + andi a1, 0xff /* spread fillword */ |
| 63 | + LONG_SLL t1, a1, 8 |
| 64 | + or a1, t1 |
| 65 | + LONG_SLL t1, a1, 16 |
| 66 | + or a1, t1 |
| 67 | + LONG_SLL t1, a1, 32 |
| 68 | + or a1, t1 |
| 69 | +1: |
| 70 | + |
| 71 | +FEXPORT(__bzero) |
| 72 | +EXPORT_SYMBOL(__bzero) |
| 73 | + sltiu t0, a2, STORSIZE /* very small region? */ |
| 74 | + bnez t0, .Lsmall_memset |
| 75 | + andi t0, a0, STORMASK /* aligned? */ |
| 76 | + |
| 77 | + .set noat |
| 78 | + li AT, STORSIZE |
| 79 | + beqz t0, 1f |
| 80 | + PTR_SUBU t0, AT /* alignment in bytes */ |
| 81 | + .set at |
| 82 | + |
| 83 | + EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword 8B aligned */ |
| 84 | + .set push |
| 85 | + .set arch=mips64r2 |
| 86 | + PTR_ADDIU t1, a0, 8 |
| 87 | + dins t1, zero, 0, 3 |
| 88 | + .set pop |
| 89 | + EX(LONG_S, a1, (t1), .Lsecond_fixup) /* May double copy 8B */ |
| 90 | + |
| 91 | + PTR_SUBU a0, t0 /* long align ptr */ |
| 92 | + PTR_ADDU a2, t0 /* correct size */ |
| 93 | + |
| 94 | +1: ori t1, a2, 0x7f /* # of full blocks */ |
| 95 | + xori t1, 0x7f |
| 96 | + beqz t1, .Lmemset_partial /* no block to fill */ |
| 97 | + andi t0, a2, 0x80-STORSIZE |
| 98 | + |
| 99 | + PTR_ADDU t1, a0 /* end address */ |
| 100 | + .set reorder |
| 101 | +1: PTR_ADDIU a0, 128 |
| 102 | + f_fill128 a0, -128, a1, .Lfwd_fixup |
| 103 | + bne t1, a0, 1b |
| 104 | + .set noreorder |
| 105 | + |
| 106 | +.Lmemset_partial: |
| 107 | + PTR_LA t1, 2f /* where to start */ |
| 108 | + .set noat |
| 109 | + LONG_SRL AT, t0, 2 |
| 110 | + PTR_SUBU t1, AT |
| 111 | + .set at |
| 112 | + jr t1 |
| 113 | + PTR_ADDU a0, t0 /* dest ptr */ |
| 114 | + |
| 115 | + .set push |
| 116 | + .set noreorder |
| 117 | + .set nomacro |
| 118 | + f_fill128 a0, -128, a1, .Lpartial_fixup /* ... but first do 16Bs ... */ |
| 119 | +2: .set pop |
| 120 | + andi a2, STORMASK /* At most 15B to go */ |
| 121 | + |
| 122 | + beqz a2, 1f |
| 123 | + PTR_ADDU a0, a2 /* What's left */ |
| 124 | + .set push |
| 125 | + .set arch=mips64r2 |
| 126 | + PTR_ADDI t1, a0, -8 |
| 127 | + dins t1, zero, 0, 3 |
| 128 | + .set pop |
| 129 | + EX(LONG_S, a1, (t1), .Lnotlast_fixup) /* May double copy 8B */ |
| 130 | + EX(LONG_S_L, a1, -1(a0), .Llast_fixup) |
| 131 | +1: jr ra |
| 132 | + move a2, zero |
| 133 | + |
| 134 | +.Lsmall_memset: |
| 135 | + andi t1, a2, 7 |
| 136 | + beq t1, a2, 1f |
| 137 | + LONG_SLL t1, 2 |
| 138 | + |
| 139 | + EX(LONG_S_R, a1, (a0), .Lfirst_fixup) |
| 140 | + EX(LONG_S_L, a1, 7(a0), .Lsmall_memset_fixup) |
| 141 | + |
| 142 | +1: PTR_LA t0, 2f |
| 143 | + PTR_SUBU t1, t0, t1 |
| 144 | + jr t1 |
| 145 | + PTR_ADDU a0, a2 |
| 146 | + |
| 147 | + EX(sb, a1, -7(a0), .Lsmall_memset_partial_fixup) |
| 148 | + EX(sb, a1, -6(a0), .Lsmall_memset_partial_fixup) |
| 149 | + EX(sb, a1, -5(a0), .Lsmall_memset_partial_fixup) |
| 150 | + EX(sb, a1, -4(a0), .Lsmall_memset_partial_fixup) |
| 151 | + EX(sb, a1, -3(a0), .Lsmall_memset_partial_fixup) |
| 152 | + EX(sb, a1, -2(a0), .Lsmall_memset_partial_fixup) |
| 153 | + EX(sb, a1, -1(a0), .Lsmall_memset_partial_fixup) |
| 154 | + |
| 155 | +2: jr ra /* done */ |
| 156 | + move a2, zero |
| 157 | + END(memset) |
| 158 | + |
| 159 | +.Lsmall_memset_fixup: |
| 160 | + PTR_ADDIU t0, a0, 8 |
| 161 | + .set push |
| 162 | + .set arch=mips64r2 |
| 163 | + dins t0, zero, 0, 3 |
| 164 | + .set pop |
| 165 | + LONG_ADDU a2, a0 |
| 166 | + jr ra |
| 167 | + LONG_SUBU a2, t0 |
| 168 | + |
| 169 | +.Lsmall_memset_partial_fixup: |
| 170 | + PTR_L t0, TI_TASK($28) |
| 171 | + LONG_L t0, THREAD_BUADDR(t0) |
| 172 | + jr ra |
| 173 | + LONG_SUBU a2, a0, t0 |
| 174 | + |
| 175 | +.Lfirst_fixup: |
| 176 | + jr ra |
| 177 | + nop |
| 178 | + |
| 179 | +.Lsecond_fixup: |
| 180 | + LONG_ADDU a2, a0 |
| 181 | + jr ra |
| 182 | + LONG_SUBU a2, t1 |
| 183 | + |
| 184 | +.Lfwd_fixup: |
| 185 | + PTR_L t0, TI_TASK($28) |
| 186 | + andi a2, 0x7f |
| 187 | + LONG_L t0, THREAD_BUADDR(t0) |
| 188 | + LONG_ADDU a2, t1 |
| 189 | + jr ra |
| 190 | + LONG_SUBU a2, t0 |
| 191 | + |
| 192 | +.Lpartial_fixup: |
| 193 | + PTR_L t0, TI_TASK($28) |
| 194 | + andi a2, STORMASK |
| 195 | + LONG_L t0, THREAD_BUADDR(t0) |
| 196 | + LONG_ADDU a2, a0 |
| 197 | + jr ra |
| 198 | + LONG_SUBU a2, t0 |
| 199 | + |
| 200 | +.Llast_fixup: |
| 201 | + jr ra |
| 202 | + andi a2, 0x7 |
| 203 | + |
| 204 | +.Lnotlast_fixup: |
| 205 | + jr ra |
| 206 | + PTR_SUBU a2, a0, t1 |
0 commit comments