-
Notifications
You must be signed in to change notification settings - Fork 3.2k
[WIP] for eval sdk e2e test #44601
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[WIP] for eval sdk e2e test #44601
Changes from all commits
f3c7122
d7d3ac4
3ee48f1
0d8e382
fa0f5b4
c961851
6adf165
896585f
e6d52c5
62bd9f0
489346b
d6745d0
3a37911
197a2ac
490c516
3623d86
c47daf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -32,6 +32,30 @@ | |||||
| # Centralizing here avoids magic strings sprinkled through schema/content generation code. | ||||||
| WRAPPER_KEY = "item" | ||||||
|
|
||||||
| # Keys that must remain at the top level (outside the wrapper) when we | ||||||
| # normalize flat JSONL rows into the canonical `item` structure. | ||||||
| _RESERVED_ROOT_KEYS: Set[str] = {"sample"} | ||||||
|
|
||||||
|
|
||||||
| def _normalize_row_for_item_wrapper(row: Dict[str, Any]) -> Dict[str, Any]: | ||||||
| """Ensure every row exposes an `item` object without losing reserved keys.""" | ||||||
|
|
||||||
| wrapper = row.get(WRAPPER_KEY) | ||||||
| if isinstance(wrapper, dict): | ||||||
| return row | ||||||
|
|
||||||
| normalized: Dict[str, Any] = {} | ||||||
| item_payload: Dict[str, Any] = {} | ||||||
|
|
||||||
| for key, value in row.items(): | ||||||
| if key in _RESERVED_ROOT_KEYS: | ||||||
| normalized[key] = value | ||||||
| elif key != WRAPPER_KEY: | ||||||
| item_payload[key] = value | ||||||
|
|
||||||
| normalized[WRAPPER_KEY] = item_payload | ||||||
| return normalized | ||||||
|
|
||||||
|
|
||||||
| class OAIEvalRunCreationInfo(TypedDict, total=True): | ||||||
| """Configuration for an evaluator""" | ||||||
|
|
@@ -146,7 +170,6 @@ def _begin_single_aoai_evaluation( | |||||
| that maps the user-supplied evaluators to the names of the graders as generated by the OAI service. | ||||||
| :rtype: Tuple[str, str, Dict[str, str]] | ||||||
| """ | ||||||
|
|
||||||
| # Format data for eval group creation | ||||||
| LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}") | ||||||
| grader_name_list = [] | ||||||
|
|
@@ -637,7 +660,6 @@ def to_schema(node: Dict[str, Any]) -> Dict[str, Any]: | |||||
| required = [] | ||||||
| for name, child in children.items(): | ||||||
| props[name] = to_schema(child) | ||||||
| required.append(name) | ||||||
| return { | ||||||
| "type": "object", | ||||||
| "properties": props, | ||||||
|
|
@@ -785,25 +807,37 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str] | |||||
| :rtype: Dict[str, Any] | ||||||
| """ | ||||||
|
|
||||||
| def _convert_value_to_string(val: Any) -> str: | ||||||
| """Convert a value to string representation for AOAI evaluation.""" | ||||||
| def _convert_value(val: Any) -> Any: | ||||||
| """Convert to AOAI-friendly representation while preserving structure when useful.""" | ||||||
| if val is None: | ||||||
| return "" | ||||||
| elif isinstance(val, (str, int, float, bool)): | ||||||
| if isinstance(val, str): | ||||||
| return val | ||||||
| if isinstance(val, bool): | ||||||
| return val | ||||||
| # Align numerics with legacy text-only JSONL payloads by turning them into strings. | ||||||
| if isinstance(val, (int, float, list)): | ||||||
| return str(val) | ||||||
| else: | ||||||
| try: # Attempt to JSON serialize lists/dicts | ||||||
| return json.dumps(val, ensure_ascii=False) | ||||||
| except (TypeError, ValueError): | ||||||
| # Fallback for unserializable objects | ||||||
| return str(val) | ||||||
| if isinstance(val, (dict)): | ||||||
|
||||||
| if isinstance(val, (dict)): | |
| if isinstance(val, dict): |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| { | ||
| "my_violence": { | ||
| "column_mapping": { | ||
| "query": "${data.item.query}", | ||
| "response": "${data.item.response}", | ||
| "context": "${data.item.context}", | ||
| "ground_truth": "${data.item.ground_truth}" | ||
| }, | ||
| "_evaluator_definition": { | ||
| "type": "service", | ||
| "metrics": { | ||
| "violence": { | ||
| "type": "ordinal", | ||
| "desirable_direction": "decrease", | ||
| "min_value": 0.0, | ||
| "max_value": 7.0 | ||
| } | ||
| } | ||
| }, | ||
| "_evaluator_id": "azureml://registries/azureml/evaluators/builtin.violence/versions/2" | ||
| }, | ||
| "my_self_harm": { | ||
| "column_mapping": { | ||
| "query": "${data.item.query}", | ||
| "response": "${data.item.response}", | ||
| "context": "${data.item.context}", | ||
| "ground_truth": "${data.item.ground_truth}" | ||
| }, | ||
| "_evaluator_definition": { | ||
| "type": "service", | ||
| "metrics": { | ||
| "self_harm": { | ||
| "type": "ordinal", | ||
| "desirable_direction": "decrease", | ||
| "min_value": 0.0, | ||
| "max_value": 7.0 | ||
| } | ||
| } | ||
| }, | ||
| "_evaluator_id": "azureml://registries/azureml/evaluators/builtin.self_harm/versions/2" | ||
| }, | ||
| "ViolenceContentCustomEvaluator": { | ||
| "column_mapping": { | ||
| "query": "{{item.question}}", | ||
| "response": "{{item.answer}}", | ||
| "context": "{{item.context}}", | ||
| "ground_truth": "{{item.ground_truth}}" | ||
| }, | ||
| "_evaluator_definition": { | ||
| "type": "service", | ||
| "metrics": { | ||
| "ViolenceContentCustomEvaluator": { | ||
| "type": "boolean", | ||
| "desirable_direction": "decrease", | ||
| "min_value": 0.0, | ||
| "max_value": 7.0 | ||
| } | ||
| } | ||
| }, | ||
| "_evaluator_id": "azureml://registries/azureml/evaluators/builtin.ViolenceContentCustomEvaluator/versions/2" | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The CHANGELOG entry has a minor spelling/grammar issue. The phrase "handling of nested fields for AOAI graders when using files as datasource" should include an article: "when using files as a datasource" or "when using files as datasources".