@@ -772,6 +772,10 @@ def TuningUseGLMDivSqrtCosts
772772def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
773773 "Target has branch hint feature">;
774774
775+ def TuningAvoidMFENCE
776+ : SubtargetFeature<"avoid-mfence", "AvoidMFence", "true",
777+ "Avoid MFENCE for fence seq_cst, and instead use lock or">;
778+
775779//===----------------------------------------------------------------------===//
776780// X86 CPU Families
777781// TODO: Remove these - use general tuning features to determine codegen.
@@ -833,7 +837,8 @@ def ProcessorFeatures {
833837 TuningSlow3OpsLEA,
834838 TuningSlowDivide64,
835839 TuningSlowIncDec,
836- TuningInsertVZEROUPPER
840+ TuningInsertVZEROUPPER,
841+ TuningAvoidMFENCE
837842 ];
838843
839844 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
@@ -849,7 +854,8 @@ def ProcessorFeatures {
849854 TuningFastSHLDRotate,
850855 TuningFast15ByteNOP,
851856 TuningPOPCNTFalseDeps,
852- TuningInsertVZEROUPPER
857+ TuningInsertVZEROUPPER,
858+ TuningAvoidMFENCE
853859 ];
854860
855861 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
@@ -868,7 +874,8 @@ def ProcessorFeatures {
868874 TuningPOPCNTFalseDeps,
869875 TuningLZCNTFalseDeps,
870876 TuningInsertVZEROUPPER,
871- TuningAllowLight256Bit
877+ TuningAllowLight256Bit,
878+ TuningAvoidMFENCE
872879 ];
873880
874881 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
@@ -892,15 +899,17 @@ def ProcessorFeatures {
892899 TuningFastGather,
893900 TuningPOPCNTFalseDeps,
894901 TuningInsertVZEROUPPER,
895- TuningAllowLight256Bit
902+ TuningAllowLight256Bit,
903+ TuningAvoidMFENCE
896904 ];
897905
898906 // Nehalem
899907 list<SubtargetFeature> NHMFeatures = X86_64V2Features;
900908 list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
901909 TuningSlowDivide64,
902910 TuningInsertVZEROUPPER,
903- TuningNoDomainDelayMov];
911+ TuningNoDomainDelayMov,
912+ TuningAvoidMFENCE];
904913
905914 // Westmere
906915 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -921,7 +930,8 @@ def ProcessorFeatures {
921930 TuningFast15ByteNOP,
922931 TuningPOPCNTFalseDeps,
923932 TuningInsertVZEROUPPER,
924- TuningNoDomainDelayMov];
933+ TuningNoDomainDelayMov,
934+ TuningAvoidMFENCE];
925935 list<SubtargetFeature> SNBFeatures =
926936 !listconcat(WSMFeatures, SNBAdditionalFeatures);
927937
@@ -987,7 +997,8 @@ def ProcessorFeatures {
987997 TuningAllowLight256Bit,
988998 TuningNoDomainDelayMov,
989999 TuningNoDomainDelayShuffle,
990- TuningNoDomainDelayBlend];
1000+ TuningNoDomainDelayBlend,
1001+ TuningAvoidMFENCE];
9911002 list<SubtargetFeature> SKLFeatures =
9921003 !listconcat(BDWFeatures, SKLAdditionalFeatures);
9931004
@@ -1022,7 +1033,8 @@ def ProcessorFeatures {
10221033 TuningNoDomainDelayMov,
10231034 TuningNoDomainDelayShuffle,
10241035 TuningNoDomainDelayBlend,
1025- TuningFastImmVectorShift];
1036+ TuningFastImmVectorShift,
1037+ TuningAvoidMFENCE];
10261038 list<SubtargetFeature> SKXFeatures =
10271039 !listconcat(BDWFeatures, SKXAdditionalFeatures);
10281040
@@ -1065,7 +1077,8 @@ def ProcessorFeatures {
10651077 TuningNoDomainDelayMov,
10661078 TuningNoDomainDelayShuffle,
10671079 TuningNoDomainDelayBlend,
1068- TuningFastImmVectorShift];
1080+ TuningFastImmVectorShift,
1081+ TuningAvoidMFENCE];
10691082 list<SubtargetFeature> CNLFeatures =
10701083 !listconcat(SKLFeatures, CNLAdditionalFeatures);
10711084
@@ -1094,7 +1107,8 @@ def ProcessorFeatures {
10941107 TuningNoDomainDelayMov,
10951108 TuningNoDomainDelayShuffle,
10961109 TuningNoDomainDelayBlend,
1097- TuningFastImmVectorShift];
1110+ TuningFastImmVectorShift,
1111+ TuningAvoidMFENCE];
10981112 list<SubtargetFeature> ICLFeatures =
10991113 !listconcat(CNLFeatures, ICLAdditionalFeatures);
11001114
@@ -1268,7 +1282,8 @@ def ProcessorFeatures {
12681282 // Tremont
12691283 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
12701284 FeatureGFNI];
1271- list<SubtargetFeature> TRMTuning = GLPTuning;
1285+ list<SubtargetFeature> TRMAdditionalTuning = [TuningAvoidMFENCE];
1286+ list<SubtargetFeature> TRMTuning = !listconcat(GLPTuning, TRMAdditionalTuning);
12721287 list<SubtargetFeature> TRMFeatures =
12731288 !listconcat(GLPFeatures, TRMAdditionalFeatures);
12741289
@@ -1446,7 +1461,8 @@ def ProcessorFeatures {
14461461 TuningFastImm16,
14471462 TuningSBBDepBreaking,
14481463 TuningSlowDivide64,
1449- TuningSlowSHLD];
1464+ TuningSlowSHLD,
1465+ TuningAvoidMFENCE];
14501466 list<SubtargetFeature> BtVer2Features =
14511467 !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
14521468
@@ -1475,7 +1491,8 @@ def ProcessorFeatures {
14751491 TuningFastScalarShiftMasks,
14761492 TuningBranchFusion,
14771493 TuningSBBDepBreaking,
1478- TuningInsertVZEROUPPER];
1494+ TuningInsertVZEROUPPER,
1495+ TuningAvoidMFENCE];
14791496
14801497 // PileDriver
14811498 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
@@ -1555,7 +1572,8 @@ def ProcessorFeatures {
15551572 TuningSlowSHLD,
15561573 TuningSBBDepBreaking,
15571574 TuningInsertVZEROUPPER,
1558- TuningAllowLight256Bit];
1575+ TuningAllowLight256Bit,
1576+ TuningAvoidMFENCE];
15591577 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
15601578 FeatureRDPID,
15611579 FeatureRDPRU,
@@ -1740,7 +1758,8 @@ def : ProcModel<P, SandyBridgeModel, [
17401758[
17411759 TuningMacroFusion,
17421760 TuningSlowUAMem16,
1743- TuningInsertVZEROUPPER
1761+ TuningInsertVZEROUPPER,
1762+ TuningAvoidMFENCE
17441763]>;
17451764}
17461765foreach P = ["penryn", "core_2_duo_sse4_1"] in {
@@ -1759,7 +1778,8 @@ def : ProcModel<P, SandyBridgeModel, [
17591778[
17601779 TuningMacroFusion,
17611780 TuningSlowUAMem16,
1762- TuningInsertVZEROUPPER
1781+ TuningInsertVZEROUPPER,
1782+ TuningAvoidMFENCE
17631783]>;
17641784}
17651785
0 commit comments