Skip to content

Commit dabb9dc

Browse files
committed
getting old samples to work after updates
1 parent 687ce79 commit dabb9dc

File tree

3 files changed

+110
-117
lines changed

3 files changed

+110
-117
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
"promptpex:test-st-def:ollama": "genaiscript run promptpex \"samples/speech-tag/speech-tag.prompty\" --vars \"effort=min\" --vars \"out=evals/test-st-def\" --env .env.ollama",
9696
"promptpex:test-st-mingt": "genaiscript run promptpex \"samples/speech-tag/speech-tag.prompty\" --vars \"effort=min\" --vars \"groundtruthModel=azure:gpt-4.1-mini_2025-04-14\" --vars \"evalModel=ollama:llama3.3\" --vars \"evals=true\" --vars \"compliance=true\" --vars \"baselineTests=false\" --vars \"modelsUnderTest=ollama:llama3.3\" --vars \"out=evals/test-st-mingt\"",
9797
"promptpex:test-st-mediumgt": "genaiscript run promptpex \"samples/speech-tag/speech-tag-multi.prompty\" --vars \"effort=medium\" --vars \"groundtruthModel=azure:gpt-4.1-mini_2025-04-14\" --vars \"evalModel=ollama:llama3.3\" --vars \"evals=true\" --vars \"compliance=true\" --vars \"modelsUnderTest=azure:gpt-4.1-mini_2025-04-14;ollama:llama3.3\" --vars \"out=evals/test-st-mediumgt\"",
98+
"promptpex:test-st-min:ollama": "genaiscript run promptpex \"samples/speech-tag/speech-tag.prompty\" --vars \"effort=min\" --vars \"groundtruthModel=azure:gpt-4.1-mini_2025-04-14\" --vars \"evals=true\" --vars \"modelsUnderTest=ollama:qwen2.5:3b;ollama:llama3.2:1b;ollama:llama3.3\" --vars \"compliance=false\" --vars \"baselineTests=false\" --vars \"evalModelGroundtruth=azure:gpt-4.1-mini_2025-04-14;ollama:llama3.3\" --vars \"out=evals/test-st-min\" --env .env.ollama",
9899
"promptpex:test-st-min-gen:ollama": "genaiscript run promptpex \"samples/speech-tag/speech-tag.prompty\" --vars \"effort=min\" --vars \"groundtruthModel=azure:gpt-4.1-mini_2025-04-14\" --vars \"evals=false\" --vars \"compliance=false\" --vars \"baselineTests=false\" --vars \"evalModelGroundtruth=azure:gpt-4.1-mini_2025-04-14;ollama:llama3.3\" --vars \"out=evals/test-st-min-gen\" --env .env.ollama",
99100
"promptpex:test-st-min-run:ollama": "genaiscript run promptpex \"evals/test-st-min-gen/speech-tag/promptpex_context.json\" --vars \"evals=false\" --vars \"compliance=false\" --vars \"baselineTests=false\" --vars \"modelsUnderTest=ollama:qwen2.5:3b;ollama:llama3.2:1b;ollama:llama3.3\" --vars \"out=evals/test-st-min-run --env .env.ollama\"",
100101
"promptpex:test-st-min-eval:ollama": "genaiscript run promptpex \"evals/test-st-min-run/speech-tag/promptpex_context.json\" --vars \"evals=true\" --vars \"compliance=true\" --vars \"baselineTests=false\" --vars \"evalModel=azure:gpt-4.1-mini_2025-04-14\" --vars \"out=evals/test-st-min-eval\" --env .env.ollama",

samples/analyze-metrics.ipynb

Lines changed: 103 additions & 116 deletions
Large diffs are not rendered by default.

samples/run-samples.zx.mjs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,14 @@ const outDir = `evals/test-all-${dateStr}`;
2929
for (const prompty of promptyFilesAll) {
3030
const promptyFileBase = path.basename(prompty, path.extname(prompty));
3131

32-
await $`npm run promptpex ${prompty} -- --vars \"effort=medium\" --vars \"evals=true\" --vars \"compliance=true\" --vars \"baselineTests=false\" --vars \"modelsUnderTest=azure:gpt-4o-mini_2024-07-18;ollama:gemma2:9b;ollama:qwen2.5:3b;ollama:llama3.2:1b\" --vars "out=${outDir}/${promptyFileBase}"`;
32+
await $`npm run promptpex ${prompty} -- --vars \"effort=min\" --vars \"groundtruthModel=ollama:llama3.3\" --vars \"evals=true\" --vars \"modelsUnderTest=ollama:qwen2.5:3b;ollama:llama3.2:1b;ollama:llama3.3\" --vars \"compliance=false\" --vars \"baselineTests=false\" --vars \"evalModelGroundtruth=ollama:llama3.3\" --env .env.ollama --vars \"out=${outDir}/${promptyFileBase}\"`;
3333
}
3434

35+
// uses gp4 models from TRAPI
36+
// await $`npm run promptpex ${prompty} -- --vars \"effort=min\" --vars \"groundtruthModel=azure:gpt-4o_2024-11-20\" --vars \"evals=true\" --vars \"modelsUnderTest=ollama:qwen2.5:3b;ollama:llama3.2:1b;ollama:llama3.3\" --vars \"compliance=false\" --vars \"baselineTests=false\" --vars \"evalModelGroundtruth=azure:gpt-4o_2024-11-20;ollama:llama3.3\" --env .env.ollama --vars \"out=${outDir}/${promptyFileBase}\"`;
37+
38+
// await $`npm run promptpex ${prompty} -- --vars \"effort=medium\" --vars \"evals=true\" --vars \"compliance=true\" --vars \"baselineTests=false\" --vars \"modelsUnderTest=azure:gpt-4o-mini_2024-07-18;ollama:gemma2:9b;ollama:qwen2.5:3b;ollama:llama3.2:1b\" --vars "out=${outDir}/${promptyFileBase}"`;
39+
3540
// await $`npm run promptpex ${prompty} -- --vars "splitRules=true" --vars "maxRulesPerTestGeneration=5" --vars "testGenerations=1" --vars "evals=true" --vars"testExpansions=0" --vars "compliance=true" --vars baselineTests=false --vars "modelsUnderTest=azure:gpt-4o-mini_2024-07-18;ollama:gemma2:9b;ollama:qwen2.5:3b;ollama:llama3.2:1b" --vars "out=${outDir}/${promptyFileBase}"`;
3641

3742
// await $`npm run promptpex ${prompty} -- --vars "splitRules=true" --vars "maxRulesPerTestGeneration=5" --vars "testGenerations=1" --vars "evals=true" --vars "testExpansions=0" --vars "compliance=true" --vars "baselineTests=false" --vars "modelsUnderTest=ollama:llama3.2:1b" --vars "out=${outDir}/${promptyFileBase}"`;

0 commit comments

Comments
 (0)