@@ -4687,3 +4687,145 @@ def _ccached_zai_glm_45():
46874687 },
46884688 }
46894689 )
4690+
4691+
4692+ def _ccached_microsoft_phi3_mini_128k_instruct ():
4693+ "microsoft/Phi-3-mini-128k-instruct"
4694+ return transformers .Phi3Config (
4695+ ** {
4696+ "_name_or_path" : "Phi-3-mini-128k-instruct" ,
4697+ "architectures" : ["Phi3ForCausalLM" ],
4698+ "attention_dropout" : 0.0 ,
4699+ "auto_map" : {
4700+ "AutoConfig" : "configuration_phi3.Phi3Config" ,
4701+ "AutoModelForCausalLM" : "modeling_phi3.Phi3ForCausalLM" ,
4702+ },
4703+ "bos_token_id" : 1 ,
4704+ "embd_pdrop" : 0.0 ,
4705+ "eos_token_id" : 32000 ,
4706+ "hidden_act" : "silu" ,
4707+ "hidden_size" : 3072 ,
4708+ "initializer_range" : 0.02 ,
4709+ "intermediate_size" : 8192 ,
4710+ "max_position_embeddings" : 131072 ,
4711+ "model_type" : "phi3" ,
4712+ "num_attention_heads" : 32 ,
4713+ "num_hidden_layers" : 32 ,
4714+ "num_key_value_heads" : 32 ,
4715+ "original_max_position_embeddings" : 4096 ,
4716+ "pad_token_id" : 32000 ,
4717+ "resid_pdrop" : 0.0 ,
4718+ "rms_norm_eps" : 1e-05 ,
4719+ "rope_scaling" : {
4720+ "long_factor" : [
4721+ 1.0700000524520874 ,
4722+ 1.1200000047683716 ,
4723+ 1.149999976158142 ,
4724+ 1.4199999570846558 ,
4725+ 1.5699999332427979 ,
4726+ 1.7999999523162842 ,
4727+ 2.129999876022339 ,
4728+ 2.129999876022339 ,
4729+ 3.009999990463257 ,
4730+ 5.910000324249268 ,
4731+ 6.950000286102295 ,
4732+ 9.070000648498535 ,
4733+ 9.930000305175781 ,
4734+ 10.710000038146973 ,
4735+ 11.130000114440918 ,
4736+ 14.609999656677246 ,
4737+ 15.409998893737793 ,
4738+ 19.809999465942383 ,
4739+ 37.279998779296875 ,
4740+ 38.279998779296875 ,
4741+ 38.599998474121094 ,
4742+ 40.12000274658203 ,
4743+ 46.20000457763672 ,
4744+ 50.940006256103516 ,
4745+ 53.66000747680664 ,
4746+ 54.9373893737793 ,
4747+ 56.89738845825195 ,
4748+ 57.28738784790039 ,
4749+ 59.98738479614258 ,
4750+ 60.86738586425781 ,
4751+ 60.887386322021484 ,
4752+ 61.71739196777344 ,
4753+ 62.91739273071289 ,
4754+ 62.957393646240234 ,
4755+ 63.41739273071289 ,
4756+ 63.8173942565918 ,
4757+ 63.83739471435547 ,
4758+ 63.897396087646484 ,
4759+ 63.93739700317383 ,
4760+ 64.06739807128906 ,
4761+ 64.11434936523438 ,
4762+ 64.12435150146484 ,
4763+ 64.15435028076172 ,
4764+ 64.19435119628906 ,
4765+ 64.24435424804688 ,
4766+ 64.57435607910156 ,
4767+ 64.69000244140625 ,
4768+ 64.76000213623047 ,
4769+ ],
4770+ "short_factor" : [
4771+ 1.1 ,
4772+ 1.1 ,
4773+ 1.1 ,
4774+ 1.3000000000000003 ,
4775+ 1.3500000000000003 ,
4776+ 1.3500000000000003 ,
4777+ 1.4000000000000004 ,
4778+ 1.5500000000000005 ,
4779+ 2.000000000000001 ,
4780+ 2.000000000000001 ,
4781+ 2.000000000000001 ,
4782+ 2.000000000000001 ,
4783+ 2.000000000000001 ,
4784+ 2.000000000000001 ,
4785+ 2.000000000000001 ,
4786+ 2.000000000000001 ,
4787+ 2.000000000000001 ,
4788+ 2.000000000000001 ,
4789+ 2.000000000000001 ,
4790+ 2.000000000000001 ,
4791+ 2.000000000000001 ,
4792+ 2.000000000000001 ,
4793+ 2.000000000000001 ,
4794+ 2.000000000000001 ,
4795+ 2.000000000000001 ,
4796+ 2.0500000000000007 ,
4797+ 2.0500000000000007 ,
4798+ 2.0500000000000007 ,
4799+ 2.0500000000000007 ,
4800+ 2.0500000000000007 ,
4801+ 2.0500000000000007 ,
4802+ 2.1000000000000005 ,
4803+ 2.1000000000000005 ,
4804+ 2.1500000000000004 ,
4805+ 2.25 ,
4806+ 2.25 ,
4807+ 2.25 ,
4808+ 2.25 ,
4809+ 2.25 ,
4810+ 2.3999999999999995 ,
4811+ 2.4499999999999993 ,
4812+ 2.499999999999999 ,
4813+ 2.6999999999999984 ,
4814+ 2.6999999999999984 ,
4815+ 2.7499999999999982 ,
4816+ 2.799999999999998 ,
4817+ 2.8999999999999977 ,
4818+ 3.049999999999997 ,
4819+ ],
4820+ "type" : "longrope" ,
4821+ },
4822+ "rope_theta" : 10000.0 ,
4823+ "sliding_window" : 262144 ,
4824+ "tie_word_embeddings" : false ,
4825+ "torch_dtype" : "bfloat16" ,
4826+ "transformers_version" : "4.40.2" ,
4827+ "use_cache" : true ,
4828+ "attention_bias" : false ,
4829+ "vocab_size" : 32064 ,
4830+ }
4831+ )
0 commit comments