@@ -117,7 +117,9 @@ TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr")
117
117
// AMX internal builtin
118
118
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, " vvC*" , " n" , " amx-tile" )
119
119
TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, " V256iUsUsvC*z" , " n" , " amx-tile" )
120
+ TARGET_BUILTIN(__builtin_ia32_tileloaddrs64_internal, " V256iUsUsvC*z" , " n" , " amx-movrs" )
120
121
TARGET_BUILTIN(__builtin_ia32_tileloaddt164_internal, " V256iUsUsvC*z" , " n" , " amx-tile" )
122
+ TARGET_BUILTIN(__builtin_ia32_tileloaddrst164_internal, " V256iUsUsvC*z" , " n" , " amx-movrs" )
121
123
TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
122
124
TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
123
125
TARGET_BUILTIN(__builtin_ia32_tdpbusd_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
@@ -129,10 +131,15 @@ TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i",
129
131
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-complex" )
130
132
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-complex" )
131
133
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
134
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
132
135
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
136
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
133
137
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
138
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
134
139
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
140
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
135
141
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, " V256iUsUsV256i" , " n" , " amx-transpose" )
142
+
136
143
TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, " V16fUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
137
144
TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, " V32yUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
138
145
TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, " V32yUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
@@ -147,6 +154,13 @@ TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
147
154
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, " vvC*" , " n" , " amx-tile" )
148
155
TARGET_BUILTIN(__builtin_ia32_tilerelease, " v" , " n" , " amx-tile" )
149
156
TARGET_BUILTIN(__builtin_ia32_tilezero, " vUc" , " n" , " amx-tile" )
157
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
158
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
159
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
160
+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
161
+
162
+ TARGET_BUILTIN(__builtin_ia32_tileloaddrs64, " vIUcvC*z" , " n" , " amx-movrs" )
163
+ TARGET_BUILTIN(__builtin_ia32_tileloaddrst164, " vIUcvC*z" , " n" , " amx-movrs" )
150
164
151
165
TARGET_BUILTIN(__builtin_ia32_tileloadd64, " vIUcvC*z" , " n" , " amx-tile" )
152
166
TARGET_BUILTIN(__builtin_ia32_tileloaddt164, " vIUcvC*z" , " n" , " amx-tile" )
0 commit comments