Skip to content

Commit d2d928c

Browse files
author
hubicka
committed
* i386.c (dimode_scalar_chain::compute_convert_gain): Use xmm_move instead of sse_move. (sse_store_index): New function. (ix86_register_move_cost): Be more sensible about mismatch stall; model AVX moves correctly; make difference between sse->integer and integer->sse. (ix86_builtin_vectorization_cost): Model correctly aligned and unaligned moves; make difference between SSE and AVX. * i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move and zmm_move. Increase size of sse load and store tables; add unaligned load and store tables; add ssemmx_to_integer. * x86-tune-costs.h: Update all entries according to real move latencies from Agner Fog's manual and chip documentation. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@254012 138bc75d-0d04-0410-961f-82ee72b054a4
1 parent e3f64b6 commit d2d928c

File tree

4 files changed

+456
-293
lines changed

4 files changed

+456
-293
lines changed

gcc/ChangeLog

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
2017-10-23 Jan Hubicka <[email protected]>
2+
3+
* i386.c (dimode_scalar_chain::compute_convert_gain): Use
4+
xmm_move instead of sse_move.
5+
(sse_store_index): New function.
6+
(ix86_register_move_cost): Be more sensible about mismatch stall;
7+
model AVX moves correctly; make difference between sse->integer and
8+
integer->sse.
9+
(ix86_builtin_vectorization_cost): Model correctly aligned and unaligned
10+
moves; make difference between SSE and AVX.
11+
* i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move
12+
and zmm_move. Increase size of sse load and store tables;
13+
add unaligned load and store tables; add ssemmx_to_integer.
14+
* x86-tune-costs.h: Update all entries according to real
15+
move latencies from Agner Fog's manual and chip documentation.
16+
117
2017-10-23 Jakub Jelinek <[email protected]>
218

319
PR target/82628

gcc/config/i386/i386.c

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,7 +1601,7 @@ dimode_scalar_chain::compute_convert_gain ()
16011601
rtx dst = SET_DEST (def_set);
16021602

16031603
if (REG_P (src) && REG_P (dst))
1604-
gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
1604+
gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
16051605
else if (REG_P (src) && MEM_P (dst))
16061606
gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
16071607
else if (MEM_P (src) && REG_P (dst))
@@ -38634,6 +38634,28 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
3863438634
return true;
3863538635
}
3863638636

38637+
/* Return index of MODE in the sse load/store tables. */
38638+
38639+
static inline int
38640+
sse_store_index (machine_mode mode)
38641+
{
38642+
switch (GET_MODE_SIZE (mode))
38643+
{
38644+
case 4:
38645+
return 0;
38646+
case 8:
38647+
return 1;
38648+
case 16:
38649+
return 2;
38650+
case 32:
38651+
return 3;
38652+
case 64:
38653+
return 4;
38654+
default:
38655+
return -1;
38656+
}
38657+
}
38658+
3863738659
/* Return the cost of moving data of mode M between a
3863838660
register and memory. A value of 2 is the default; this cost is
3863938661
relative to those in `REGISTER_MOVE_COST'.
@@ -38677,21 +38699,9 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
3867738699
}
3867838700
if (SSE_CLASS_P (regclass))
3867938701
{
38680-
int index;
38681-
switch (GET_MODE_SIZE (mode))
38682-
{
38683-
case 4:
38684-
index = 0;
38685-
break;
38686-
case 8:
38687-
index = 1;
38688-
break;
38689-
case 16:
38690-
index = 2;
38691-
break;
38692-
default:
38693-
return 100;
38694-
}
38702+
int index = sse_store_index (mode);
38703+
if (index == -1)
38704+
return 100;
3869538705
if (in == 2)
3869638706
return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
3869738707
return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
@@ -38794,8 +38804,10 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
3879438804
/* In case of copying from general_purpose_register we may emit multiple
3879538805
stores followed by single load causing memory size mismatch stall.
3879638806
Count this as arbitrarily high cost of 20. */
38797-
if (targetm.class_max_nregs (class1, mode)
38798-
> targetm.class_max_nregs (class2, mode))
38807+
if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
38808+
&& TARGET_MEMORY_MISMATCH_STALL
38809+
&& targetm.class_max_nregs (class1, mode)
38810+
> targetm.class_max_nregs (class2, mode))
3879938811
cost += 20;
3880038812

3880138813
/* In the case of FP/MMX moves, the registers actually overlap, and we
@@ -38817,12 +38829,19 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
3881738829
where integer modes in MMX/SSE registers are not tieable
3881838830
because of missing QImode and HImode moves to, from or between
3881938831
MMX/SSE registers. */
38820-
return MAX (8, ix86_cost->mmxsse_to_integer);
38832+
return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
38833+
? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
3882138834

3882238835
if (MAYBE_FLOAT_CLASS_P (class1))
3882338836
return ix86_cost->fp_move;
3882438837
if (MAYBE_SSE_CLASS_P (class1))
38825-
return ix86_cost->sse_move;
38838+
{
38839+
if (GET_MODE_BITSIZE (mode) <= 128)
38840+
return ix86_cost->xmm_move;
38841+
if (GET_MODE_BITSIZE (mode) <= 256)
38842+
return ix86_cost->ymm_move;
38843+
return ix86_cost->zmm_move;
38844+
}
3882638845
if (MAYBE_MMX_CLASS_P (class1))
3882738846
return ix86_cost->mmx_move;
3882838847
return 2;
@@ -44370,6 +44389,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4437044389
{
4437144390
bool fp = false;
4437244391
machine_mode mode = TImode;
44392+
int index;
4437344393
if (vectype != NULL)
4437444394
{
4437544395
fp = FLOAT_TYPE_P (vectype);
@@ -44397,13 +44417,16 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4439744417
true);
4439844418

4439944419
case vector_load:
44420+
index = sse_store_index (mode);
44421+
gcc_assert (index >= 0);
4440044422
return ix86_vec_cost (mode,
44401-
COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
44423+
COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
4440244424
true);
4440344425

4440444426
case vector_store:
44427+
index = sse_store_index (mode);
4440544428
return ix86_vec_cost (mode,
44406-
COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
44429+
COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
4440744430
true);
4440844431

4440944432
case vec_to_scalar:
@@ -44414,14 +44437,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4441444437
Do that incrementally. */
4441544438
case unaligned_load:
4441644439
case vector_gather_load:
44440+
index = sse_store_index (mode);
4441744441
return ix86_vec_cost (mode,
44418-
COSTS_N_INSNS (ix86_cost->sse_load[2]),
44442+
COSTS_N_INSNS
44443+
(ix86_cost->sse_unaligned_load[index]) / 2,
4441944444
true);
4442044445

4442144446
case unaligned_store:
4442244447
case vector_scatter_store:
44448+
index = sse_store_index (mode);
4442344449
return ix86_vec_cost (mode,
44424-
COSTS_N_INSNS (ix86_cost->sse_store[2]),
44450+
COSTS_N_INSNS
44451+
(ix86_cost->sse_unaligned_store[index]) / 2,
4442544452
true);
4442644453

4442744454
case cond_branch_taken:

gcc/config/i386/i386.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -242,13 +242,17 @@ struct processor_costs {
242242
in SImode and DImode */
243243
const int mmx_store[2]; /* cost of storing MMX register
244244
in SImode and DImode */
245-
const int sse_move; /* cost of moving SSE register. */
246-
const int sse_load[3]; /* cost of loading SSE register
247-
in SImode, DImode and TImode*/
248-
const int sse_store[3]; /* cost of storing SSE register
249-
in SImode, DImode and TImode*/
245+
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
246+
zmm_move;
247+
const int sse_load[5]; /* cost of loading SSE register
248+
in 32bit, 64bit, 128bit, 256bit and 512bit */
249+
const int sse_unaligned_load[5];/* cost of unaligned load. */
250+
const int sse_store[5]; /* cost of storing SSE register
251+
in SImode, DImode and TImode. */
252+
const int sse_unaligned_store[5];/* cost of unaligned store. */
250253
const int mmxsse_to_integer; /* cost of moving mmxsse register to
251-
integer and vice versa. */
254+
integer. */
255+
const int ssemmx_to_integer; /* cost of moving integer to mmxsse register. */
252256
const int l1_cache_size; /* size of l1 cache, in kilobytes. */
253257
const int l2_cache_size; /* size of l2 cache, in kilobytes. */
254258
const int prefetch_block; /* bytes moved to cache for prefetch. */

0 commit comments

Comments
 (0)