Example with gpt-4o (set the environment variable OPENAI_API_KEY to your API key):
for t in irish_blimp_1_verbal_morphology_generate_until irish_blimp_2_nouns_cases_generate_until irish_blimp_3_adjectives_comparison_generate_until irish_blimp_4_pronouns_generate_until irish_blimp_5_articles_determiners_generate_until irish_blimp_6_numbers_generate_until irish_blimp_7_copula_bí_generate_until irish_blimp_8_clause_structure_word_order_generate_until irish_blimp_9_questions_negations_generate_until irish_blimp_10_discourse_sentence_types_generate_until irish_blimp_11_adverbs_modifiers_generate_until; do
lm_eval --model openai-chat-completions \
--model_args model=gpt-4o \
--tasks ${t} \
--num_fewshot 0 \
--output_path output/temp \
--log_samples \
--apply_chat_template;
doneExample with openai/gpt-oss-20b:
for t in irish_blimp_1_verbal_morphology irish_blimp_2_nouns_cases irish_blimp_3_adjectives_comparison irish_blimp_4_pronouns irish_blimp_5_articles_determiners irish_blimp_6_numbers irish_blimp_7_copula_bí irish_blimp_8_clause_structure_word_order irish_blimp_9_questions_negations irish_blimp_10_discourse_sentence_types irish_blimp_11_adverbs_modifiers; do
lm_eval --model vllm \
--model_args pretrained=openai/gpt-oss-20b,tensor_parallel_size=2,gpu_memory_utilization=0.95,max_model_len=4096 \
--tasks ${t} \
--num_fewshot 0 \
--output_path output/temp \
--log_samples \
--apply_chat_template;
done