@@ -4160,3 +4160,145 @@ def _ccached_ydshieh_tiny_random_vit_for_image_classification():
41604160 "transformers_version" : "4.24.0.dev0" ,
41614161 }
41624162 )
4163+
4164+
4165+ def _ccached_microsoft_phi_35_mini_instruct ():
4166+ "microsoft/Phi-3.5-mini-instruct"
4167+ return transformers .Phi3Config (
4168+ ** {
4169+ "_name_or_path" : "Phi-3.5-mini-instruct" ,
4170+ "architectures" : ["Phi3ForCausalLM" ],
4171+ "attention_dropout" : 0.0 ,
4172+ "auto_map" : {
4173+ "AutoConfig" : "configuration_phi3.Phi3Config" ,
4174+ "AutoModelForCausalLM" : "modeling_phi3.Phi3ForCausalLM" ,
4175+ },
4176+ "bos_token_id" : 1 ,
4177+ "embd_pdrop" : 0.0 ,
4178+ "eos_token_id" : 32000 ,
4179+ "hidden_act" : "silu" ,
4180+ "hidden_size" : 3072 ,
4181+ "initializer_range" : 0.02 ,
4182+ "intermediate_size" : 8192 ,
4183+ "max_position_embeddings" : 131072 ,
4184+ "model_type" : "phi3" ,
4185+ "num_attention_heads" : 32 ,
4186+ "num_hidden_layers" : 32 ,
4187+ "num_key_value_heads" : 32 ,
4188+ "original_max_position_embeddings" : 4096 ,
4189+ "pad_token_id" : 32000 ,
4190+ "resid_pdrop" : 0.0 ,
4191+ "rms_norm_eps" : 1e-05 ,
4192+ "rope_scaling" : {
4193+ "long_factor" : [
4194+ 1.0800000429153442 ,
4195+ 1.1100000143051147 ,
4196+ 1.1399999856948853 ,
4197+ 1.340000033378601 ,
4198+ 1.5899999141693115 ,
4199+ 1.600000023841858 ,
4200+ 1.6200000047683716 ,
4201+ 2.620000123977661 ,
4202+ 3.2300000190734863 ,
4203+ 3.2300000190734863 ,
4204+ 4.789999961853027 ,
4205+ 7.400000095367432 ,
4206+ 7.700000286102295 ,
4207+ 9.09000015258789 ,
4208+ 12.199999809265137 ,
4209+ 17.670000076293945 ,
4210+ 24.46000099182129 ,
4211+ 28.57000160217285 ,
4212+ 30.420001983642578 ,
4213+ 30.840002059936523 ,
4214+ 32.590003967285156 ,
4215+ 32.93000411987305 ,
4216+ 42.320003509521484 ,
4217+ 44.96000289916992 ,
4218+ 50.340003967285156 ,
4219+ 50.45000457763672 ,
4220+ 57.55000305175781 ,
4221+ 57.93000411987305 ,
4222+ 58.21000289916992 ,
4223+ 60.1400032043457 ,
4224+ 62.61000442504883 ,
4225+ 62.62000274658203 ,
4226+ 62.71000289916992 ,
4227+ 63.1400032043457 ,
4228+ 63.1400032043457 ,
4229+ 63.77000427246094 ,
4230+ 63.93000411987305 ,
4231+ 63.96000289916992 ,
4232+ 63.970001220703125 ,
4233+ 64.02999877929688 ,
4234+ 64.06999969482422 ,
4235+ 64.08000183105469 ,
4236+ 64.12000274658203 ,
4237+ 64.41000366210938 ,
4238+ 64.4800033569336 ,
4239+ 64.51000213623047 ,
4240+ 64.52999877929688 ,
4241+ 64.83999633789062 ,
4242+ ],
4243+ "short_factor" : [
4244+ 1.0 ,
4245+ 1.0199999809265137 ,
4246+ 1.0299999713897705 ,
4247+ 1.0299999713897705 ,
4248+ 1.0499999523162842 ,
4249+ 1.0499999523162842 ,
4250+ 1.0499999523162842 ,
4251+ 1.0499999523162842 ,
4252+ 1.0499999523162842 ,
4253+ 1.0699999332427979 ,
4254+ 1.0999999046325684 ,
4255+ 1.1099998950958252 ,
4256+ 1.1599998474121094 ,
4257+ 1.1599998474121094 ,
4258+ 1.1699998378753662 ,
4259+ 1.2899998426437378 ,
4260+ 1.339999794960022 ,
4261+ 1.679999828338623 ,
4262+ 1.7899998426437378 ,
4263+ 1.8199998140335083 ,
4264+ 1.8499997854232788 ,
4265+ 1.8799997568130493 ,
4266+ 1.9099997282028198 ,
4267+ 1.9399996995925903 ,
4268+ 1.9899996519088745 ,
4269+ 2.0199997425079346 ,
4270+ 2.0199997425079346 ,
4271+ 2.0199997425079346 ,
4272+ 2.0199997425079346 ,
4273+ 2.0199997425079346 ,
4274+ 2.0199997425079346 ,
4275+ 2.0299997329711914 ,
4276+ 2.0299997329711914 ,
4277+ 2.0299997329711914 ,
4278+ 2.0299997329711914 ,
4279+ 2.0299997329711914 ,
4280+ 2.0299997329711914 ,
4281+ 2.0299997329711914 ,
4282+ 2.0299997329711914 ,
4283+ 2.0299997329711914 ,
4284+ 2.0799996852874756 ,
4285+ 2.0899996757507324 ,
4286+ 2.189999580383301 ,
4287+ 2.2199995517730713 ,
4288+ 2.5899994373321533 ,
4289+ 2.729999542236328 ,
4290+ 2.749999523162842 ,
4291+ 2.8399994373321533 ,
4292+ ],
4293+ "type" : "longrope" ,
4294+ },
4295+ "rope_theta" : 10000.0 ,
4296+ "sliding_window" : 262144 ,
4297+ "tie_word_embeddings" : false ,
4298+ "torch_dtype" : "bfloat16" ,
4299+ "transformers_version" : "4.43.3" ,
4300+ "use_cache" : true ,
4301+ "attention_bias" : false ,
4302+ "vocab_size" : 32064 ,
4303+ }
4304+ )
0 commit comments