|
274 | 274 | ] |
275 | 275 | } |
276 | 276 | }, |
277 | | - { |
278 | | - "model_name": "hpcai-tech/grok-1", |
279 | | - "model_type": null, |
280 | | - "additional_params": { |
281 | | - "max_position_embeddings": 128, |
282 | | - "num_hidden_layers": 1, |
283 | | - "num_attention_heads": 2, |
284 | | - "hidden_size": 64, |
285 | | - "intermediate_size": 256, |
286 | | - "vocab_size": 131072, |
287 | | - "num_key_value_heads": 1 |
288 | | - } |
289 | | - }, |
290 | 277 | { |
291 | 278 | "model_name": "neuralmagic/Llama-3.2-3B-Instruct-FP8", |
292 | 279 | "model_type": "llama", |
|
374 | 361 | } |
375 | 362 | }, |
376 | 363 | { |
377 | | - "model_name": "unsloth/gemma-2b", |
378 | | - "model_type": "gemma", |
379 | | - "additional_params": { |
380 | | - "max_position_embeddings": 128, |
381 | | - "num_hidden_layers": 1, |
382 | | - "num_attention_heads": 2, |
383 | | - "hidden_size": 64, |
384 | | - "intermediate_size": 256, |
385 | | - "vocab_size": 256000, |
386 | | - "num_key_value_heads": 1 |
387 | | - } |
388 | | - }, |
389 | | - { |
390 | | - "model_name": "unsloth/gemma-2-2b", |
| 364 | + "model_name": "hf-internal-testing/tiny-random-Gemma2ForCausalLM", |
391 | 365 | "model_type": "gemma2", |
392 | 366 | "additional_params": { |
393 | 367 | "max_position_embeddings": 128, |
|
400 | 374 | } |
401 | 375 | }, |
402 | 376 | { |
403 | | - "model_name": "ibm-granite/granite-20b-code-base", |
| 377 | + "model_name": "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM", |
404 | 378 | "model_type": "gpt_bigcode", |
405 | 379 | "additional_params": { |
406 | 380 | "max_position_embeddings": 128, |
|
430 | 404 | } |
431 | 405 | }, |
432 | 406 | { |
433 | | - "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", |
| 407 | + "model_name": "hf-internal-testing/tiny-random-MixtralForCausalLM", |
434 | 408 | "model_type": "mixtral", |
435 | 409 | "additional_params": { |
436 | 410 | "max_position_embeddings": 128, |
|
443 | 417 | } |
444 | 418 | }, |
445 | 419 | { |
446 | | - "model_name": "meta-llama/Llama-3.2-1B", |
| 420 | + "model_name": "hf-internal-testing/tiny-random-LlamaForCausalLM", |
447 | 421 | "model_type": "llama", |
448 | 422 | "additional_params": { |
449 | 423 | "max_position_embeddings": 128, |
|
476 | 450 | } |
477 | 451 | }, |
478 | 452 | { |
479 | | - "model_name": "ibm-granite/granite-3.1-2b-instruct", |
| 453 | + "model_name": "hf-internal-testing/tiny-random-GraniteForCausalLM", |
480 | 454 | "model_type": "granite", |
481 | 455 | "additional_params": { |
482 | 456 | "max_position_embeddings": 128, |
|
0 commit comments