@@ -1601,7 +1601,7 @@ dimode_scalar_chain::compute_convert_gain ()
16011601 rtx dst = SET_DEST (def_set);
16021602
16031603 if (REG_P (src) && REG_P (dst))
1604- gain += COSTS_N_INSNS (2) - ix86_cost->sse_move ;
1604+ gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move ;
16051605 else if (REG_P (src) && MEM_P (dst))
16061606 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
16071607 else if (MEM_P (src) && REG_P (dst))
@@ -38634,6 +38634,28 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
3863438634 return true;
3863538635}
3863638636
38637+ /* Return index of MODE in the sse load/store tables. */
38638+
38639+ static inline int
38640+ sse_store_index (machine_mode mode)
38641+ {
38642+ switch (GET_MODE_SIZE (mode))
38643+ {
38644+ case 4:
38645+ return 0;
38646+ case 8:
38647+ return 1;
38648+ case 16:
38649+ return 2;
38650+ case 32:
38651+ return 3;
38652+ case 64:
38653+ return 4;
38654+ default:
38655+ return -1;
38656+ }
38657+ }
38658+
3863738659/* Return the cost of moving data of mode M between a
3863838660 register and memory. A value of 2 is the default; this cost is
3863938661 relative to those in `REGISTER_MOVE_COST'.
@@ -38677,21 +38699,9 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
3867738699 }
3867838700 if (SSE_CLASS_P (regclass))
3867938701 {
38680- int index;
38681- switch (GET_MODE_SIZE (mode))
38682- {
38683- case 4:
38684- index = 0;
38685- break;
38686- case 8:
38687- index = 1;
38688- break;
38689- case 16:
38690- index = 2;
38691- break;
38692- default:
38693- return 100;
38694- }
38702+ int index = sse_store_index (mode);
38703+ if (index == -1)
38704+ return 100;
3869538705 if (in == 2)
3869638706 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
3869738707 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
@@ -38794,8 +38804,10 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
3879438804 /* In case of copying from general_purpose_register we may emit multiple
3879538805 stores followed by single load causing memory size mismatch stall.
3879638806 Count this as arbitrarily high cost of 20. */
38797- if (targetm.class_max_nregs (class1, mode)
38798- > targetm.class_max_nregs (class2, mode))
38807+ if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
38808+ && TARGET_MEMORY_MISMATCH_STALL
38809+ && targetm.class_max_nregs (class1, mode)
38810+ > targetm.class_max_nregs (class2, mode))
3879938811 cost += 20;
3880038812
3880138813 /* In the case of FP/MMX moves, the registers actually overlap, and we
@@ -38817,12 +38829,19 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
3881738829 where integer modes in MMX/SSE registers are not tieable
3881838830 because of missing QImode and HImode moves to, from or between
3881938831 MMX/SSE registers. */
38820- return MAX (8, ix86_cost->mmxsse_to_integer);
38832+ return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
38833+ ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
3882138834
3882238835 if (MAYBE_FLOAT_CLASS_P (class1))
3882338836 return ix86_cost->fp_move;
3882438837 if (MAYBE_SSE_CLASS_P (class1))
38825- return ix86_cost->sse_move;
38838+ {
38839+ if (GET_MODE_BITSIZE (mode) <= 128)
38840+ return ix86_cost->xmm_move;
38841+ if (GET_MODE_BITSIZE (mode) <= 256)
38842+ return ix86_cost->ymm_move;
38843+ return ix86_cost->zmm_move;
38844+ }
3882638845 if (MAYBE_MMX_CLASS_P (class1))
3882738846 return ix86_cost->mmx_move;
3882838847 return 2;
@@ -44370,6 +44389,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4437044389{
4437144390 bool fp = false;
4437244391 machine_mode mode = TImode;
44392+ int index;
4437344393 if (vectype != NULL)
4437444394 {
4437544395 fp = FLOAT_TYPE_P (vectype);
@@ -44397,13 +44417,16 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4439744417 true);
4439844418
4439944419 case vector_load:
44420+ index = sse_store_index (mode);
44421+ gcc_assert (index >= 0);
4440044422 return ix86_vec_cost (mode,
44401- COSTS_N_INSNS (ix86_cost->sse_load[2 ]) / 2,
44423+ COSTS_N_INSNS (ix86_cost->sse_load[index ]) / 2,
4440244424 true);
4440344425
4440444426 case vector_store:
44427+ index = sse_store_index (mode);
4440544428 return ix86_vec_cost (mode,
44406- COSTS_N_INSNS (ix86_cost->sse_store[2 ]) / 2,
44429+ COSTS_N_INSNS (ix86_cost->sse_store[index ]) / 2,
4440744430 true);
4440844431
4440944432 case vec_to_scalar:
@@ -44414,14 +44437,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4441444437 Do that incrementally. */
4441544438 case unaligned_load:
4441644439 case vector_gather_load:
44440+ index = sse_store_index (mode);
4441744441 return ix86_vec_cost (mode,
44418- COSTS_N_INSNS (ix86_cost->sse_load[2]),
44442+ COSTS_N_INSNS
44443+ (ix86_cost->sse_unaligned_load[index]) / 2,
4441944444 true);
4442044445
4442144446 case unaligned_store:
4442244447 case vector_scatter_store:
44448+ index = sse_store_index (mode);
4442344449 return ix86_vec_cost (mode,
44424- COSTS_N_INSNS (ix86_cost->sse_store[2]),
44450+ COSTS_N_INSNS
44451+ (ix86_cost->sse_unaligned_store[index]) / 2,
4442544452 true);
4442644453
4442744454 case cond_branch_taken:
0 commit comments