@@ -41,15 +41,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
4141/* X86_TUNE_SCHEDULE: Enable scheduling. */
4242DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
4343 m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
44- | m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
44+ | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
4545
4646/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
4747 on modern chips. Preffer stores affecting whole integer register
4848 over partial stores. For example preffer MOVZBL or MOVQ to load 8bit
4949 value over movb. */
5050DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
5151 m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
52- | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
52+ | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
5353
5454/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
5555 destinations to be 128bit to allow register renaming on 128bit SSE units,
@@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
8585 partial dependencies. */
8686DEF_TUNE (X86_TUNE_MOVX, "movx",
8787 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
88- | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
88+ | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
8989
9090/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
9191 full sized loads. */
9292DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
9393 m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
94- | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
94+ | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
9595
9696/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
9797 conditional jump instruction for 32 bit TARGET.
@@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
125125/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
126126 during reassociation of fp computation. */
127127DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
128- m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
128+ m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM | m_INTEL | m_BDVER1
129129 | m_BDVER2 | m_ZNVER1 | m_GENERIC)
130130
131131/*****************************************************************************/
@@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
145145 regression on mgrid due to IRA limitation leading to unecessary
146146 use of the frame pointer in 32bit mode. */
147147DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
148- m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
148+ m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
149149 | m_ATHLON_K8)
150150
151151/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
@@ -207,8 +207,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
207207/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
208208 than 4 branch instructions in the 16 byte window. */
209209DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
210- m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL |
211- m_ATHLON_K8 | m_AMDFAM10)
210+ m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
211+ |m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
212212
213213/*****************************************************************************/
214214/* Integer instruction selection tuning */
@@ -231,22 +231,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
231231/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */
232232DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
233233 ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
234- | m_KNL | m_GENERIC))
234+ | m_KNL | m_KNM | m_GENERIC))
235235
236236/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
237237 for DFmode copies */
238238DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
239239 ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
240- | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
240+ | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
241241
242242/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
243243 will impact LEA instruction selection. */
244244DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
245- | m_INTEL)
245+ | m_KNM | m_INTEL)
246246
247247/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
248248DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
249- m_BONNELL | m_SILVERMONT | m_KNL)
249+ m_BONNELL | m_SILVERMONT | m_KNL | m_KNM )
250250
251251/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
252252 vector path on AMD machines.
@@ -263,7 +263,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
263263/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
264264 a conditional move. */
265265DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
266- m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
266+ m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
267267
268268/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
269269 as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
@@ -281,17 +281,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
281281/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
282282DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
283283 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
284- | m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
284+ | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
285285 | m_BTVER | m_ZNVER1 | m_GENERIC)
286286
287287/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
288288DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
289- ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
289+ ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
290290 | m_K6))
291291
292292/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
293293DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
294- m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
294+ m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
295295 | m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC)
296296
297297/*****************************************************************************/
@@ -308,15 +308,15 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
308308 integer operand. */
309309DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
310310 ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
311- | m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
311+ | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
312312
313313/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
314314DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
315315
316316/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
317317DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
318318 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
319- | m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
319+ | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
320320
321321/*****************************************************************************/
322322/* SSE instruction selection tuning */
@@ -330,13 +330,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
330330/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
331331 of a sequence loading registers by parts. */
332332DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
333- m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
333+ m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
334334 | m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC)
335335
336336/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
337337 of a sequence loading registers by parts. */
338338DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
339- m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
339+ m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
340340 | m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC)
341341
342342/* Use packed single precision instructions where posisble. I.e. movups instead
@@ -375,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
375375/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
376376 fp converts to destination register. */
377377DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
378- m_SILVERMONT | m_KNL | m_INTEL)
378+ m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
379379
380380/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
381381 from FP to FP. This form of instructions avoids partial write to the
@@ -389,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
389389
390390/* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
391391DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
392- m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
392+ m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
393393
394394/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
395395 execute 2 or more vector instructions in parallel. */
@@ -550,4 +550,4 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
550550/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
551551 if-converted sequence to one. */
552552DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
553- m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
553+ m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)
0 commit comments