Skip to content

Commit df4660d

Browse files
author
uros
committed
gcc/
* config.gcc: Support "knm". * config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm". * config/i386/i386-c.c (ix86_target_macros_internal): Handle PROCESSOR_KNM. * config/i386/i386.c (m_KNM): Define. (processor_target_table): Add "knm". (PTA_KNM): Define. (ix86_option_override_internal): Add "knm". (ix86_issue_rate): Add PROCESSOR_KNM. (ix86_adjust_cost): Ditto. (ia32_multipass_dfa_lookahead): Ditto. (get_builtin_code_for_version): Handle PROCESSOR_KNM. (fold_builtin_cpu): Add M_INTEL_KNM. * config/i386/i386.h (processor_costs): Define TARGET_KNM. (processor_type): Add PROCESSOR_KNM. * config/i386/x86-tune.def: Add m_KNM. * doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type. libgcc/ * config/i386/cpuinfo.h (processor_types): Add INTEL_KNM. * config/i386/cpuinfo.c (get_intel_cpu): Detect Knights Mill. gcc/testsuite/ * gcc.target/i386/builtin_target.c: Test knm. * gcc.target/i386/funcspec-56.inc: Test arch=knm. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253013 138bc75d-0d04-0410-961f-82ee72b054a4
1 parent d6680b5 commit df4660d

File tree

14 files changed

+123
-47
lines changed

14 files changed

+123
-47
lines changed

gcc/ChangeLog

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
2017-09-20 Sebastian Peryt <[email protected]>
2+
3+
* config.gcc: Support "knm".
4+
* config/i386/driver-i386.c (host_detect_local_cpu): Detect "knm".
5+
* config/i386/i386-c.c (ix86_target_macros_internal): Handle
6+
PROCESSOR_KNM.
7+
* config/i386/i386.c (m_KNM): Define.
8+
(processor_target_table): Add "knm".
9+
(PTA_KNM): Define.
10+
(ix86_option_override_internal): Add "knm".
11+
(ix86_issue_rate): Add PROCESSOR_KNM.
12+
(ix86_adjust_cost): Ditto.
13+
(ia32_multipass_dfa_lookahead): Ditto.
14+
(get_builtin_code_for_version): Handle PROCESSOR_KNM.
15+
(fold_builtin_cpu): Add M_INTEL_KNM.
16+
* config/i386/i386.h (processor_costs): Define TARGET_KNM.
17+
(processor_type): Add PROCESSOR_KNM.
18+
* config/i386/x86-tune.def: Add m_KNM.
19+
* doc/invoke.texi: Add knm as x86 -march=/-mtune= CPU type.
20+
121
2017-09-20 Richard Biener <[email protected]>
222

323
PR tree-optimization/80213
@@ -97,7 +117,7 @@
97117
* rtl.h (get_stack_check_protect): Prototype.
98118
* target.def (stack_clash_protection_final_dynamic_probe): New hook.
99119
* targhooks.c (default_stack_clash_protection_final_dynamic_probe): New.
100-
* targhooks.h (default_stack_clash_protection_final_dynamic_probe):
120+
* targhooks.h (default_stack_clash_protection_final_dynamic_probe):
101121
Prototype.
102122
* doc/tm.texi.in (TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE):
103123
Add @hook.
@@ -312,8 +332,8 @@
312332

313333
2017-09-17 Daniel Santos <[email protected]>
314334

315-
config/i386/i386.c: (xlogue_layout::STUB_NAME_MAX_LEN): Increase to 20
316-
bytes.
335+
* config/i386/i386.c (xlogue_layout::STUB_NAME_MAX_LEN):
336+
Increase to 20 bytes.
317337
(xlogue_layout::s_stub_names): Add an additional size-2 diminsion.
318338
(xlogue_layout::get_stub_name): Modify to select the appropairate sse
319339
or avx version of the stub.

gcc/config.gcc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ pentium4 pentium4m pentiumpro prescott lakemont"
623623
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
624624
bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
625625
core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
626-
sandybridge ivybridge haswell broadwell bonnell silvermont knl \
626+
sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
627627
skylake-avx512 x86-64 native"
628628

629629
# Additional x86 processors supported by --with-cpu=. Each processor

gcc/config/i386/driver-i386.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,13 +790,20 @@ const char *host_detect_local_cpu (int argc, const char **argv)
790790
/* Knights Landing. */
791791
cpu = "knl";
792792
break;
793+
case 0x85:
794+
/* Knights Mill. */
795+
cpu = "knm";
796+
break;
793797
default:
794798
if (arch)
795799
{
796800
/* This is unknown family 0x6 CPU. */
797801
/* Assume Knights Landing. */
798802
if (has_avx512f)
799803
cpu = "knl";
804+
/* Assume Knights Mill */
805+
else if (has_avx5124vnniw)
806+
cpu = "knm";
800807
/* Assume Skylake. */
801808
else if (has_clflushopt)
802809
cpu = "skylake";

gcc/config/i386/i386-c.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
176176
def_or_undef (parse_in, "__knl");
177177
def_or_undef (parse_in, "__knl__");
178178
break;
179+
case PROCESSOR_KNM:
180+
def_or_undef (parse_in, "__knm");
181+
def_or_undef (parse_in, "__knm__");
182+
break;
179183
case PROCESSOR_SKYLAKE_AVX512:
180184
def_or_undef (parse_in, "__skylake_avx512");
181185
def_or_undef (parse_in, "__skylake_avx512__");
@@ -292,6 +296,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
292296
case PROCESSOR_KNL:
293297
def_or_undef (parse_in, "__tune_knl__");
294298
break;
299+
case PROCESSOR_KNM:
300+
def_or_undef (parse_in, "__tune_knm__");
301+
break;
295302
case PROCESSOR_SKYLAKE_AVX512:
296303
def_or_undef (parse_in, "__tune_skylake_avx512__");
297304
break;

gcc/config/i386/i386.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2192,6 +2192,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
21922192
#define m_BONNELL (1U<<PROCESSOR_BONNELL)
21932193
#define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
21942194
#define m_KNL (1U<<PROCESSOR_KNL)
2195+
#define m_KNM (1U<<PROCESSOR_KNM)
21952196
#define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
21962197
#define m_INTEL (1U<<PROCESSOR_INTEL)
21972198

@@ -2903,6 +2904,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
29032904
{"bonnell", &atom_cost, 16, 15, 16, 7, 16},
29042905
{"silvermont", &slm_cost, 16, 15, 16, 7, 16},
29052906
{"knl", &slm_cost, 16, 15, 16, 7, 16},
2907+
{"knm", &slm_cost, 16, 15, 16, 7, 16},
29062908
{"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
29072909
{"intel", &intel_cost, 16, 15, 16, 7, 16},
29082910
{"geode", &geode_cost, 0, 0, 0, 0, 0},
@@ -5352,6 +5354,8 @@ ix86_option_override_internal (bool main_args_p,
53525354
(PTA_CORE2 | PTA_MOVBE)
53535355
#define PTA_SILVERMONT \
53545356
(PTA_WESTMERE | PTA_MOVBE)
5357+
#define PTA_KNM \
5358+
(PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
53555359

53565360
/* if this reaches 64, need to widen struct pta flags below */
53575361

@@ -5422,6 +5426,7 @@ ix86_option_override_internal (bool main_args_p,
54225426
{"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
54235427
{"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
54245428
{"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
5429+
{"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
54255430
{"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
54265431
{"geode", PROCESSOR_GEODE, CPU_GEODE,
54275432
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
@@ -30282,6 +30287,7 @@ ix86_issue_rate (void)
3028230287
case PROCESSOR_BONNELL:
3028330288
case PROCESSOR_SILVERMONT:
3028430289
case PROCESSOR_KNL:
30290+
case PROCESSOR_KNM:
3028530291
case PROCESSOR_INTEL:
3028630292
case PROCESSOR_K6:
3028730293
case PROCESSOR_BTVER2:
@@ -30648,6 +30654,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
3064830654

3064930655
case PROCESSOR_SILVERMONT:
3065030656
case PROCESSOR_KNL:
30657+
case PROCESSOR_KNM:
3065130658
case PROCESSOR_INTEL:
3065230659
if (!reload_completed)
3065330660
return cost;
@@ -30719,6 +30726,7 @@ ia32_multipass_dfa_lookahead (void)
3071930726
case PROCESSOR_BONNELL:
3072030727
case PROCESSOR_SILVERMONT:
3072130728
case PROCESSOR_KNL:
30729+
case PROCESSOR_KNM:
3072230730
case PROCESSOR_INTEL:
3072330731
/* Generally, we want haifa-sched:max_issue() to look ahead as far
3072430732
as many instructions can be executed on a cycle, i.e.,
@@ -33844,6 +33852,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
3384433852
arg_str = "knl";
3384533853
priority = P_PROC_AVX512F;
3384633854
break;
33855+
case PROCESSOR_KNM:
33856+
arg_str = "knm";
33857+
priority = P_PROC_AVX512F;
33858+
break;
3384733859
case PROCESSOR_SILVERMONT:
3384833860
arg_str = "silvermont";
3384933861
priority = P_PROC_SSE4_2;
@@ -34527,6 +34539,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
3452734539
M_AMD_BTVER1,
3452834540
M_AMD_BTVER2,
3452934541
M_AMDFAM17H,
34542+
M_INTEL_KNM,
3453034543
M_CPU_SUBTYPE_START,
3453134544
M_INTEL_COREI7_NEHALEM,
3453234545
M_INTEL_COREI7_WESTMERE,
@@ -34570,6 +34583,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
3457034583
{"bonnell", M_INTEL_BONNELL},
3457134584
{"silvermont", M_INTEL_SILVERMONT},
3457234585
{"knl", M_INTEL_KNL},
34586+
{"knm", M_INTEL_KNM},
3457334587
{"amdfam10h", M_AMDFAM10H},
3457434588
{"barcelona", M_AMDFAM10H_BARCELONA},
3457534589
{"shanghai", M_AMDFAM10H_SHANGHAI},

gcc/config/i386/i386.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ extern const struct processor_costs ix86_size_cost;
351351
#define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
352352
#define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
353353
#define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
354+
#define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
354355
#define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512)
355356
#define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
356357
#define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
@@ -2250,6 +2251,7 @@ enum processor_type
22502251
PROCESSOR_BONNELL,
22512252
PROCESSOR_SILVERMONT,
22522253
PROCESSOR_KNL,
2254+
PROCESSOR_KNM,
22532255
PROCESSOR_SKYLAKE_AVX512,
22542256
PROCESSOR_INTEL,
22552257
PROCESSOR_GEODE,

gcc/config/i386/x86-tune.def

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
4141
/* X86_TUNE_SCHEDULE: Enable scheduling. */
4242
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
4343
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
44-
| m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
44+
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
4545

4646
/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
4747
on modern chips. Preffer stores affecting whole integer register
4848
over partial stores. For example preffer MOVZBL or MOVQ to load 8bit
4949
value over movb. */
5050
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
5151
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
52-
| m_KNL | m_AMD_MULTIPLE | m_GENERIC)
52+
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
5353

5454
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
5555
destinations to be 128bit to allow register renaming on 128bit SSE units,
@@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
8585
partial dependencies. */
8686
DEF_TUNE (X86_TUNE_MOVX, "movx",
8787
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
88-
| m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
88+
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
8989

9090
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
9191
full sized loads. */
9292
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
9393
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
94-
| m_KNL | m_AMD_MULTIPLE | m_GENERIC)
94+
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
9595

9696
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
9797
conditional jump instruction for 32 bit TARGET.
@@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
125125
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
126126
during reassociation of fp computation. */
127127
DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
128-
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
128+
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
129129
| m_BDVER2 | m_ZNVER1 | m_GENERIC)
130130

131131
/*****************************************************************************/
@@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
145145
regression on mgrid due to IRA limitation leading to unecessary
146146
use of the frame pointer in 32bit mode. */
147147
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
148-
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
148+
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
149149
| m_ATHLON_K8)
150150

151151
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
@@ -207,8 +207,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
207207
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
208208
than 4 branch instructions in the 16 byte window. */
209209
DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
210-
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL |
211-
m_ATHLON_K8 | m_AMDFAM10)
210+
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
211+
|m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
212212

213213
/*****************************************************************************/
214214
/* Integer instruction selection tuning */
@@ -231,22 +231,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
231231
/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */
232232
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
233233
~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
234-
| m_KNL | m_GENERIC))
234+
| m_KNL | m_KNM | m_GENERIC))
235235

236236
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
237237
for DFmode copies */
238238
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
239239
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
240-
| m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
240+
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
241241

242242
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
243243
will impact LEA instruction selection. */
244244
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
245-
| m_INTEL)
245+
| m_KNM | m_INTEL)
246246

247247
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
248248
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
249-
m_BONNELL | m_SILVERMONT | m_KNL)
249+
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM)
250250

251251
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
252252
vector path on AMD machines.
@@ -263,7 +263,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
263263
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
264264
a conditional move. */
265265
DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
266-
m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
266+
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
267267

268268
/* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
269269
as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
@@ -281,17 +281,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
281281
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
282282
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
283283
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
284-
| m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
284+
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
285285
| m_BTVER | m_ZNVER1 | m_GENERIC)
286286

287287
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
288288
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
289-
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
289+
~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
290290
| m_K6))
291291

292292
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
293293
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
294-
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
294+
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
295295
| m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC)
296296

297297
/*****************************************************************************/
@@ -308,15 +308,15 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
308308
integer operand. */
309309
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
310310
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
311-
| m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
311+
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
312312

313313
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
314314
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
315315

316316
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
317317
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
318318
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
319-
| m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
319+
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
320320

321321
/*****************************************************************************/
322322
/* SSE instruction selection tuning */
@@ -330,13 +330,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
330330
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
331331
of a sequence loading registers by parts. */
332332
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
333-
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
333+
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
334334
| m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC)
335335

336336
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
337337
of a sequence loading registers by parts. */
338338
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
339-
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
339+
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
340340
| m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC)
341341

342342
/* Use packed single precision instructions where posisble. I.e. movups instead
@@ -375,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
375375
/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
376376
fp converts to destination register. */
377377
DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
378-
m_SILVERMONT | m_KNL | m_INTEL)
378+
m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
379379

380380
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
381381
from FP to FP. This form of instructions avoids partial write to the
@@ -389,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
389389

390390
/* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
391391
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
392-
m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
392+
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
393393

394394
/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
395395
execute 2 or more vector instructions in parallel. */
@@ -550,4 +550,4 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
550550
/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
551551
if-converted sequence to one. */
552552
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
553-
m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
553+
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)

gcc/doc/invoke.texi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25089,6 +25089,12 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
2508925089
BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER and
2509025090
AVX512CD instruction set support.
2509125091

25092+
@item knm
25093+
Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
25094+
SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
25095+
BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER, AVX512CD,
25096+
AVX5124VNNIW, AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support.
25097+
2509225098
@item skylake-avx512
2509325099
Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
2509425100
SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,

gcc/testsuite/ChangeLog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2017-09-20 Sebastian Peryt <[email protected]>
2+
3+
* gcc.target/i386/builtin_target.c: Test knm.
4+
* gcc.target/i386/funcspec-56.inc: Test arch=knm.
5+
16
2017-09-20 Richard Biener <[email protected]>
27

38
PR tree-optimization/77362

0 commit comments

Comments
 (0)