|
309 | 309 | { |
310 | 310 | "name": "llama_3_2_1b_instruct_accuracy", |
311 | 311 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 tqdm transformers==4.57.1", |
312 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_2_1b_accuracy" |
| 312 | + "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_2_1b", |
| 313 | + "accuracy-testing": true |
313 | 314 | }, |
314 | 315 | { |
315 | 316 | "name": "llama_3_2_3b_instruct_accuracy", |
316 | 317 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 tqdm transformers==4.57.1", |
317 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_2_3b_accuracy" |
| 318 | + "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_2_3b", |
| 319 | + "accuracy-testing": true |
318 | 320 | }, |
319 | 321 | { |
320 | 322 | "name": "llama_3_1_8b_instruct_accuracy", |
321 | 323 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 tqdm transformers==4.57.1", |
322 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_1_8b_accuracy" |
| 324 | + "pytest": "benchmark/tt-xla/test_llms.py::test_llama_3_1_8b", |
| 325 | + "accuracy-testing": true |
323 | 326 | }, |
324 | 327 | { |
325 | 328 | "name": "mistral_7b_accuracy", |
326 | 329 | "pyreq": "datasets loguru pytest requests torch==2.9.0 tqdm transformers==4.57.1 protobuf sentencepiece", |
327 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_mistral_7b_accuracy" |
| 330 | + "pytest": "benchmark/tt-xla/test_llms.py::test_mistral_7b", |
| 331 | + "accuracy-testing": true |
328 | 332 | }, |
329 | 333 | { |
330 | 334 | "name": "qwen_2_5_7b_instruct_accuracy", |
331 | 335 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
332 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_7b_accuracy" |
| 336 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_7b", |
| 337 | + "accuracy-testing": true |
333 | 338 | }, |
334 | 339 | { |
335 | 340 | "name": "google_gemma-1.1-2b-it_accuracy", |
336 | 341 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 tqdm transformers==4.57.1", |
337 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_gemma_1_1_2b_accuracy" |
| 342 | + "pytest": "benchmark/tt-xla/test_llms.py::test_gemma_1_1_2b", |
| 343 | + "accuracy-testing": true |
338 | 344 | }, |
339 | 345 | { |
340 | 346 | "name": "google_gemma-2-2b-it_accuracy", |
341 | 347 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 tqdm transformers==4.57.1", |
342 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_gemma_2_2b_accuracy" |
| 348 | + "pytest": "benchmark/tt-xla/test_llms.py::test_gemma_2_2b", |
| 349 | + "accuracy-testing": true |
343 | 350 | }, |
344 | 351 | { |
345 | 352 | "name": "microsoft_phi-1_accuracy", |
346 | 353 | "pyreq": "datasets loguru pytest requests torch==2.9.0 tqdm transformers==4.57.1", |
347 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_phi1_accuracy" |
| 354 | + "pytest": "benchmark/tt-xla/test_llms.py::test_phi1", |
| 355 | + "accuracy-testing": true |
348 | 356 | }, |
349 | 357 | { |
350 | 358 | "name": "microsoft_phi-1_5_accuracy", |
351 | 359 | "pyreq": "datasets loguru pytest requests torch==2.9.0 tqdm transformers==4.57.1", |
352 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_phi1_5_accuracy" |
| 360 | + "pytest": "benchmark/tt-xla/test_llms.py::test_phi1_5", |
| 361 | + "accuracy-testing": true |
353 | 362 | }, |
354 | 363 | { |
355 | 364 | "name": "microsoft_phi-2_accuracy", |
356 | 365 | "pyreq": "datasets loguru pytest requests torch==2.9.0 tqdm transformers==4.57.1", |
357 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_phi2_accuracy" |
| 366 | + "pytest": "benchmark/tt-xla/test_llms.py::test_phi2", |
| 367 | + "accuracy-testing": true |
358 | 368 | }, |
359 | 369 | { |
360 | 370 | "name": "tiiuae_falcon3-1b-base_accuracy", |
361 | 371 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
362 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_1b_accuracy" |
| 372 | + "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_1b", |
| 373 | + "accuracy-testing": true |
363 | 374 | }, |
364 | 375 | { |
365 | 376 | "name": "tiiuae_falcon3-3b-base_accuracy", |
366 | 377 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
367 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_3b_accuracy" |
| 378 | + "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_3b", |
| 379 | + "accuracy-testing": true |
368 | 380 | }, |
369 | 381 | { |
370 | 382 | "name": "tiiuae_falcon3-7b-base_accuracy", |
371 | 383 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
372 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_7b_accuracy" |
| 384 | + "pytest": "benchmark/tt-xla/test_llms.py::test_falcon3_7b", |
| 385 | + "accuracy-testing": true |
373 | 386 | }, |
374 | 387 | { |
375 | 388 | "name": "qwen_2_5_0_5b_instruct_accuracy", |
376 | 389 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
377 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_0_5b_accuracy" |
| 390 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_0_5b", |
| 391 | + "accuracy-testing": true |
378 | 392 | }, |
379 | 393 | { |
380 | 394 | "name": "qwen_2_5_1_5b_instruct_accuracy", |
381 | 395 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
382 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_1_5b_accuracy" |
| 396 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_1_5b", |
| 397 | + "accuracy-testing": true |
383 | 398 | }, |
384 | 399 | { |
385 | 400 | "name": "qwen_2_5_3b_instruct_accuracy", |
386 | 401 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
387 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_3b_accuracy" |
| 402 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_2_5_3b", |
| 403 | + "accuracy-testing": true |
388 | 404 | }, |
389 | 405 | { |
390 | 406 | "name": "qwen_3_0_6b_accuracy", |
391 | 407 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
392 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_0_6b_accuracy" |
| 408 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_0_6b", |
| 409 | + "accuracy-testing": true |
393 | 410 | }, |
394 | 411 | { |
395 | 412 | "name": "qwen_3_1_7b_accuracy", |
396 | 413 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
397 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_1_7b_accuracy" |
| 414 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_1_7b", |
| 415 | + "accuracy-testing": true |
398 | 416 | }, |
399 | 417 | { |
400 | 418 | "name": "qwen_3_4b_accuracy", |
401 | 419 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
402 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_4b_accuracy" |
| 420 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_4b", |
| 421 | + "accuracy-testing": true |
403 | 422 | }, |
404 | 423 | { |
405 | 424 | "name": "qwen_3_8b_accuracy", |
406 | 425 | "pyreq": "datasets loguru pytest requests tabulate timm torch==2.9.0 torchvision==0.24.0 tqdm transformers==4.57.1", |
407 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_8b_accuracy" |
| 426 | + "pytest": "benchmark/tt-xla/test_llms.py::test_qwen_3_8b", |
| 427 | + "accuracy-testing": true |
408 | 428 | }, |
409 | 429 | { |
410 | 430 | "name": "ministral_8b_accuracy", |
411 | 431 | "pyreq": "datasets loguru pytest requests torch==2.9.0 tqdm transformers==4.57.1", |
412 | | - "pytest": "benchmark/tt-xla/test_llms.py::test_ministral_8b_accuracy" |
| 432 | + "pytest": "benchmark/tt-xla/test_llms.py::test_ministral_8b", |
| 433 | + "accuracy-testing": true |
413 | 434 | } |
414 | 435 | ] |
415 | 436 | } |
|
0 commit comments