Skip to content

Commit 524fedd

Browse files
Alpha: Fix offset adjustment in unaligned access helpers
Correct the offset adjustment made in the multi-word unaligned access helpers such that it is actually used by the unaligned load and store instructions, fixing a bug introduced with commit 1eb356b ("alpha gprel optimizations")[1] back in 2001, which replaced address changes made directly according to the argument of the MEM expression passed with one made according to an address previously extracted from said MEM expression. The address is however incorrectly extracted from said MEM before an adjustment has been made to it for the offset supplied. This bug is usually covered by the fact that our block move and clear operations are hardly ever provided with correct block alignment data and we also usually fail to fetch that information from the MEM supplied (although PR target/115459 shows it does happen sometimes). Instead the bit alignment of 8 is usually conservatively used, meaning that a zero offset is passed to `alpha_expand_unaligned_store_words' and then code has been written such that neither `alpha_expand_unaligned_load_words' nor `alpha_expand_unaligned_store_words' cannot ever be called with nonzero offset from `alpha_expand_block_move'. The only situation where `alpha_expand_unaligned_store_words' can be called with nonzero offset is from `alpha_expand_block_clear' with a BWX target for a misaligned block that has been embedded in a data object of a higher alignment such that there is a small unaligned prefix our code decides to handle so as to align further stores. For instance it happens when a block clear is called for a block of 9 bytes embedded at offset 1 in a structure aligned to a 2-byte word, as illustrated by the test case included. Now this test case does not work without the change that comes next applied, because the backend cannot see the word alignment of the struct and uses the bit alignment of 8 instead. Should this change be swapped with the next one incorrect code such as: stb $31,1($16) lda $3,1($16) ldq_u $2,8($16) ldq_u $1,1($16) mskqh $2,$3,$2 stq_u $2,8($16) mskql $1,$3,$1 stq_u $1,1($16) would be produced, where the unadjusted offsets of 1/8 can be seen with the LDQ_U/STQ_U operations along with byte masks calculated accordingly rather than the expected offsets of 2/9. As a result the byte at the offset of 9 fails to get cleared. In these circumstances this would also show as execution failures with the memclr.c test: FAIL: gcc.c-torture/execute/memclr.c -O1 execution test FAIL: gcc.c-torture/execute/memclr.c -Os execution test -- not at `-O0' though, as the higher alignment cannot be retrieved in that case, and then not at `-O2' or higher optimization levels either, because then we choose to open-code this block clear instead: ldbu $1,0($16) stw $31,8($16) stq $1,0($16) avoiding the bug in `alpha_expand_unaligned_store_words'. I am leaving the pattern match test case XFAIL-ed here for documentation purposes and it will be un-XFAIL-ed along with the fix to retrieve the correct alignment. The run test is of course never expected to fail. References: [1] <https://inbox.sourceware.org/gcc-patches/[email protected]/> gcc/ * config/alpha/alpha.cc (alpha_expand_unaligned_load_words): Move address extraction until after the MEM referred has been adjusted for the offset supplied. (alpha_expand_unaligned_store_words): Likewise. gcc/testsuite/ * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: New file. * gcc.target/alpha/memclr-a2-o1-c9-run.c: New file.
1 parent 2984a3f commit 524fedd

File tree

3 files changed

+83
-8
lines changed

3 files changed

+83
-8
lines changed

gcc/config/alpha/alpha.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3625,10 +3625,6 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
36253625
rtx sreg, areg, tmp, smema;
36263626
HOST_WIDE_INT i;
36273627

3628-
smema = XEXP (smem, 0);
3629-
if (GET_CODE (smema) == LO_SUM)
3630-
smema = force_reg (Pmode, smema);
3631-
36323628
/* Generate all the tmp registers we need. */
36333629
for (i = 0; i < words; ++i)
36343630
{
@@ -3640,6 +3636,10 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
36403636
if (ofs != 0)
36413637
smem = adjust_address (smem, GET_MODE (smem), ofs);
36423638

3639+
smema = XEXP (smem, 0);
3640+
if (GET_CODE (smema) == LO_SUM)
3641+
smema = force_reg (Pmode, smema);
3642+
36433643
/* Load up all of the source data. */
36443644
for (i = 0; i < words; ++i)
36453645
{
@@ -3698,10 +3698,6 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
36983698
rtx st_addr_1, st_addr_2, dmema;
36993699
HOST_WIDE_INT i;
37003700

3701-
dmema = XEXP (dmem, 0);
3702-
if (GET_CODE (dmema) == LO_SUM)
3703-
dmema = force_reg (Pmode, dmema);
3704-
37053701
/* Generate all the tmp registers we need. */
37063702
if (data_regs != NULL)
37073703
for (i = 0; i < words; ++i)
@@ -3712,6 +3708,10 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
37123708
if (ofs != 0)
37133709
dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
37143710

3711+
dmema = XEXP (dmem, 0);
3712+
if (GET_CODE (dmema) == LO_SUM)
3713+
dmema = force_reg (Pmode, dmema);
3714+
37153715
st_addr_2 = change_address (dmem, DImode,
37163716
gen_rtx_AND (DImode,
37173717
plus_constant (DImode, dmema,
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-mbwx" } */
3+
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
4+
5+
typedef unsigned int __attribute__ ((mode (QI))) int08_t;
6+
typedef unsigned int __attribute__ ((mode (HI))) int16_t;
7+
8+
typedef struct
9+
{
10+
int08_t v[9];
11+
}
12+
s_t;
13+
14+
typedef union
15+
{
16+
struct
17+
{
18+
int08_t c[1];
19+
s_t s;
20+
int08_t d[6];
21+
};
22+
int16_t a;
23+
}
24+
u_t;
25+
26+
void __attribute__ ((noinline))
27+
memclr_a2_o1_c9 (u_t *u)
28+
{
29+
u->s = (s_t) { 0 };
30+
}
31+
32+
/* Expect assembly such as:
33+
34+
ldq_u $2,9($16)
35+
stb $31,1($16)
36+
lda $3,2($16)
37+
ldq_u $1,2($16)
38+
mskqh $2,$3,$2
39+
stq_u $2,9($16)
40+
mskql $1,$3,$1
41+
stq_u $1,2($16)
42+
43+
that is with a byte store at offset 1 and with two unaligned load/store
44+
pairs at offsets 2 and 9 each. */
45+
46+
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
47+
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
48+
/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
49+
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
50+
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* { dg-do run } */
2+
/* { dg-options "" } */
3+
4+
#include "memclr-a2-o1-c9-ptr.c"
5+
6+
u_t u = { { { [0] = 0xaa }, {{ [0 ... 8] = 0xaa }}, { [0 ... 5] = 0xaa } } };
7+
8+
int
9+
main (void)
10+
{
11+
int i;
12+
13+
memclr_a2_o1_c9 (&u);
14+
asm ("" : : : "memory");
15+
for (i = 0; i < sizeof (u.c); i++)
16+
if (u.c[i] != 0xaa)
17+
__builtin_abort ();
18+
for (i = 0; i < sizeof (u.s.v); i++)
19+
if (u.s.v[i] != 0x00)
20+
__builtin_abort ();
21+
for (i = 0; i < sizeof (u.d); i++)
22+
if (u.d[i] != 0xaa)
23+
__builtin_abort ();
24+
return 0;
25+
}

0 commit comments

Comments
 (0)