Skip to content

Commit 52b2d91

Browse files
committed
parisc: Do not hardcode registers in checksum functions
Do not hardcode processor registers r19 to r22 as scratch registers. Instead let the compiler decide, which may give better optimization results when the functions get inlined. Signed-off-by: Helge Deller <[email protected]>
1 parent 4f5cafb commit 52b2d91

File tree

1 file changed

+52
-49
lines changed

1 file changed

+52
-49
lines changed

arch/parisc/include/asm/checksum.h

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -42,31 +42,32 @@ extern __wsum csum_partial_copy_from_user(const void __user *src,
4242
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
4343
{
4444
unsigned int sum;
45+
unsigned long t0, t1, t2;
4546

4647
__asm__ __volatile__ (
4748
" ldws,ma 4(%1), %0\n"
4849
" addib,<= -4, %2, 2f\n"
4950
"\n"
50-
" ldws 4(%1), %%r20\n"
51-
" ldws 8(%1), %%r21\n"
52-
" add %0, %%r20, %0\n"
53-
" ldws,ma 12(%1), %%r19\n"
54-
" addc %0, %%r21, %0\n"
55-
" addc %0, %%r19, %0\n"
56-
"1: ldws,ma 4(%1), %%r19\n"
51+
" ldws 4(%1), %4\n"
52+
" ldws 8(%1), %5\n"
53+
" add %0, %4, %0\n"
54+
" ldws,ma 12(%1), %3\n"
55+
" addc %0, %5, %0\n"
56+
" addc %0, %3, %0\n"
57+
"1: ldws,ma 4(%1), %3\n"
5758
" addib,< 0, %2, 1b\n"
58-
" addc %0, %%r19, %0\n"
59+
" addc %0, %3, %0\n"
5960
"\n"
60-
" extru %0, 31, 16, %%r20\n"
61-
" extru %0, 15, 16, %%r21\n"
62-
" addc %%r20, %%r21, %0\n"
63-
" extru %0, 15, 16, %%r21\n"
64-
" add %0, %%r21, %0\n"
61+
" extru %0, 31, 16, %4\n"
62+
" extru %0, 15, 16, %5\n"
63+
" addc %4, %5, %0\n"
64+
" extru %0, 15, 16, %5\n"
65+
" add %0, %5, %0\n"
6566
" subi -1, %0, %0\n"
6667
"2:\n"
67-
: "=r" (sum), "=r" (iph), "=r" (ihl)
68+
: "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (t0), "=r" (t1), "=r" (t2)
6869
: "1" (iph), "2" (ihl)
69-
: "r19", "r20", "r21", "memory");
70+
: "memory");
7071

7172
return (__force __sum16)sum;
7273
}
@@ -126,6 +127,10 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
126127
__u32 len, __u8 proto,
127128
__wsum sum)
128129
{
130+
unsigned long t0, t1, t2, t3;
131+
132+
len += proto; /* add 16-bit proto + len */
133+
129134
__asm__ __volatile__ (
130135

131136
#if BITS_PER_LONG > 32
@@ -136,20 +141,19 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
136141
** Try to keep 4 registers with "live" values ahead of the ALU.
137142
*/
138143

139-
" ldd,ma 8(%1), %%r19\n" /* get 1st saddr word */
140-
" ldd,ma 8(%2), %%r20\n" /* get 1st daddr word */
141-
" add %8, %3, %3\n"/* add 16-bit proto + len */
142-
" add %%r19, %0, %0\n"
143-
" ldd,ma 8(%1), %%r21\n" /* 2cd saddr */
144-
" ldd,ma 8(%2), %%r22\n" /* 2cd daddr */
145-
" add,dc %%r20, %0, %0\n"
146-
" add,dc %%r21, %0, %0\n"
147-
" add,dc %%r22, %0, %0\n"
144+
" ldd,ma 8(%1), %4\n" /* get 1st saddr word */
145+
" ldd,ma 8(%2), %5\n" /* get 1st daddr word */
146+
" add %4, %0, %0\n"
147+
" ldd,ma 8(%1), %6\n" /* 2nd saddr */
148+
" ldd,ma 8(%2), %7\n" /* 2nd daddr */
149+
" add,dc %5, %0, %0\n"
150+
" add,dc %6, %0, %0\n"
151+
" add,dc %7, %0, %0\n"
148152
" add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */
149-
" extrd,u %0, 31, 32, %%r19\n" /* copy upper half down */
150-
" depdi 0, 31, 32, %0\n" /* clear upper half */
151-
" add %%r19, %0, %0\n" /* fold into 32-bits */
152-
" addc 0, %0, %0\n" /* add carry */
153+
" extrd,u %0, 31, 32, %4\n"/* copy upper half down */
154+
" depdi 0, 31, 32, %0\n"/* clear upper half */
155+
" add %4, %0, %0\n" /* fold into 32-bits */
156+
" addc 0, %0, %0\n" /* add carry */
153157

154158
#else
155159

@@ -158,30 +162,29 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
158162
** Insn stream is serialized on the carry bit here too.
159163
** result from the previous operation (eg r0 + x)
160164
*/
161-
162-
" ldw,ma 4(%1), %%r19\n" /* get 1st saddr word */
163-
" ldw,ma 4(%2), %%r20\n" /* get 1st daddr word */
164-
" add %8, %3, %3\n" /* add 16-bit proto + len */
165-
" add %%r19, %0, %0\n"
166-
" ldw,ma 4(%1), %%r21\n" /* 2cd saddr */
167-
" addc %%r20, %0, %0\n"
168-
" ldw,ma 4(%2), %%r22\n" /* 2cd daddr */
169-
" addc %%r21, %0, %0\n"
170-
" ldw,ma 4(%1), %%r19\n" /* 3rd saddr */
171-
" addc %%r22, %0, %0\n"
172-
" ldw,ma 4(%2), %%r20\n" /* 3rd daddr */
173-
" addc %%r19, %0, %0\n"
174-
" ldw,ma 4(%1), %%r21\n" /* 4th saddr */
175-
" addc %%r20, %0, %0\n"
176-
" ldw,ma 4(%2), %%r22\n" /* 4th daddr */
177-
" addc %%r21, %0, %0\n"
178-
" addc %%r22, %0, %0\n"
165+
" ldw,ma 4(%1), %4\n" /* get 1st saddr word */
166+
" ldw,ma 4(%2), %5\n" /* get 1st daddr word */
167+
" add %4, %0, %0\n"
168+
" ldw,ma 4(%1), %6\n" /* 2nd saddr */
169+
" addc %5, %0, %0\n"
170+
" ldw,ma 4(%2), %7\n" /* 2nd daddr */
171+
" addc %6, %0, %0\n"
172+
" ldw,ma 4(%1), %4\n" /* 3rd saddr */
173+
" addc %7, %0, %0\n"
174+
" ldw,ma 4(%2), %5\n" /* 3rd daddr */
175+
" addc %4, %0, %0\n"
176+
" ldw,ma 4(%1), %6\n" /* 4th saddr */
177+
" addc %5, %0, %0\n"
178+
" ldw,ma 4(%2), %7\n" /* 4th daddr */
179+
" addc %6, %0, %0\n"
180+
" addc %7, %0, %0\n"
179181
" addc %3, %0, %0\n" /* fold in proto+len, catch carry */
180182

181183
#endif
182-
: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
183-
: "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
184-
: "r19", "r20", "r21", "r22", "memory");
184+
: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len),
185+
"=r" (t0), "=r" (t1), "=r" (t2), "=r" (t3)
186+
: "0" (sum), "1" (saddr), "2" (daddr), "3" (len)
187+
: "memory");
185188
return csum_fold(sum);
186189
}
187190

0 commit comments

Comments
 (0)