Skip to content

Commit 55dc433

Browse files
authored
fix: Update Penguin tests to use renamed resource server (#1540)
Signed-off-by: Shashank Verma <[email protected]>
1 parent c32778d commit 55dc433

File tree

3 files changed

+36
-36
lines changed

3 files changed

+36
-36
lines changed

tests/unit/environments/penguin_test_data/test_penguin_sanity.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/unit/environments/test_penguin.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,18 @@ def penguin_vllm_generation(cluster, penguin_tokenizer): # noqa: F811
8080
def penguin(penguin_vllm_generation):
8181
"""Create a Penguin actor for testing."""
8282

83-
yaml_str = r"""multineedle_resources_server:
83+
yaml_str = r"""example_multi_step_resources_server:
8484
resources_servers:
85-
multineedle:
85+
example_multi_step:
8686
entrypoint: app.py
8787
domain: instruction_following
88-
multineedle_simple_agent:
88+
example_multi_step_simple_agent:
8989
responses_api_agents:
9090
simple_agent:
9191
entrypoint: app.py
9292
resources_server:
9393
type: resources_servers
94-
name: multineedle_resources_server
94+
name: example_multi_step_resources_server
9595
model_server:
9696
type: responses_api_models
9797
name: openai_model

tests/unit/experience/test_rollouts.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -818,37 +818,37 @@ def test_run_async_penguin_rollout(
818818
"natural_termination_rate": None,
819819
"truncation_rate": None,
820820
# per agent metrics
821-
"multineedle_simple_agent/full_result": None,
822-
"multineedle_simple_agent/accuracy/histogram": None,
823-
"multineedle_simple_agent/accuracy/max": 0.0,
824-
"multineedle_simple_agent/accuracy/mean": 0.0,
825-
"multineedle_simple_agent/accuracy/median": 0.0,
826-
"multineedle_simple_agent/accuracy/min": 0.0,
827-
"multineedle_simple_agent/accuracy/stddev": 0.0,
828-
"multineedle_simple_agent/order_instruction_following_failure/histogram": None,
829-
"multineedle_simple_agent/order_instruction_following_failure/max": 0.0,
830-
"multineedle_simple_agent/order_instruction_following_failure/mean": 0.0,
831-
"multineedle_simple_agent/order_instruction_following_failure/median": 0.0,
832-
"multineedle_simple_agent/order_instruction_following_failure/min": 0.0,
833-
"multineedle_simple_agent/order_instruction_following_failure/stddev": 0.0,
834-
"multineedle_simple_agent/original_term_minefield_hit/histogram": None,
835-
"multineedle_simple_agent/original_term_minefield_hit/max": 0.0,
836-
"multineedle_simple_agent/original_term_minefield_hit/mean": 0.0,
837-
"multineedle_simple_agent/original_term_minefield_hit/median": 0.0,
838-
"multineedle_simple_agent/original_term_minefield_hit/min": 0.0,
839-
"multineedle_simple_agent/original_term_minefield_hit/stddev": 0.0,
840-
"multineedle_simple_agent/reward/histogram": None,
841-
"multineedle_simple_agent/reward/max": 0.0,
842-
"multineedle_simple_agent/reward/mean": 0.0,
843-
"multineedle_simple_agent/reward/median": 0.0,
844-
"multineedle_simple_agent/reward/min": 0.0,
845-
"multineedle_simple_agent/reward/stddev": 0.0,
846-
"multineedle_simple_agent/set_overlap/histogram": None,
847-
"multineedle_simple_agent/set_overlap/max": 0.0,
848-
"multineedle_simple_agent/set_overlap/mean": 0.0,
849-
"multineedle_simple_agent/set_overlap/median": 0.0,
850-
"multineedle_simple_agent/set_overlap/min": 0.0,
851-
"multineedle_simple_agent/set_overlap/stddev": 0.0,
821+
"example_multi_step_simple_agent/full_result": None,
822+
"example_multi_step_simple_agent/accuracy/histogram": None,
823+
"example_multi_step_simple_agent/accuracy/max": 0.0,
824+
"example_multi_step_simple_agent/accuracy/mean": 0.0,
825+
"example_multi_step_simple_agent/accuracy/median": 0.0,
826+
"example_multi_step_simple_agent/accuracy/min": 0.0,
827+
"example_multi_step_simple_agent/accuracy/stddev": 0.0,
828+
"example_multi_step_simple_agent/order_instruction_following_failure/histogram": None,
829+
"example_multi_step_simple_agent/order_instruction_following_failure/max": 0.0,
830+
"example_multi_step_simple_agent/order_instruction_following_failure/mean": 0.0,
831+
"example_multi_step_simple_agent/order_instruction_following_failure/median": 0.0,
832+
"example_multi_step_simple_agent/order_instruction_following_failure/min": 0.0,
833+
"example_multi_step_simple_agent/order_instruction_following_failure/stddev": 0.0,
834+
"example_multi_step_simple_agent/original_term_minefield_hit/histogram": None,
835+
"example_multi_step_simple_agent/original_term_minefield_hit/max": 0.0,
836+
"example_multi_step_simple_agent/original_term_minefield_hit/mean": 0.0,
837+
"example_multi_step_simple_agent/original_term_minefield_hit/median": 0.0,
838+
"example_multi_step_simple_agent/original_term_minefield_hit/min": 0.0,
839+
"example_multi_step_simple_agent/original_term_minefield_hit/stddev": 0.0,
840+
"example_multi_step_simple_agent/reward/histogram": None,
841+
"example_multi_step_simple_agent/reward/max": 0.0,
842+
"example_multi_step_simple_agent/reward/mean": 0.0,
843+
"example_multi_step_simple_agent/reward/median": 0.0,
844+
"example_multi_step_simple_agent/reward/min": 0.0,
845+
"example_multi_step_simple_agent/reward/stddev": 0.0,
846+
"example_multi_step_simple_agent/set_overlap/histogram": None,
847+
"example_multi_step_simple_agent/set_overlap/max": 0.0,
848+
"example_multi_step_simple_agent/set_overlap/mean": 0.0,
849+
"example_multi_step_simple_agent/set_overlap/median": 0.0,
850+
"example_multi_step_simple_agent/set_overlap/min": 0.0,
851+
"example_multi_step_simple_agent/set_overlap/stddev": 0.0,
852852
},
853853
}
854854

0 commit comments

Comments
 (0)