Skip to content

Commit f508c93

Browse files
authored
Merge branch 'main' into bryant/update-voice-fallback
2 parents dff84ac + 70823a2 commit f508c93

File tree

2 files changed

+107
-64
lines changed

2 files changed

+107
-64
lines changed

fern/apis/api/openapi.json

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6434,6 +6434,7 @@
64346434
"type": "string",
64356435
"description": "This is the OpenAI model that will be used.",
64366436
"enum": [
6437+
"chatgpt-4o-latest",
64376438
"o3-mini",
64386439
"o1-preview",
64396440
"o1-preview-2024-09-12",
@@ -6466,6 +6467,7 @@
64666467
"type": "array",
64676468
"description": "These are the fallback models that will be used if the primary model fails. This shouldn't be specified unless you have a specific reason to do so. Vapi will automatically find the fastest fallbacks that make sense.",
64686469
"enum": [
6470+
"chatgpt-4o-latest",
64696471
"o3-mini",
64706472
"o1-preview",
64716473
"o1-preview-2024-09-12",
@@ -6500,6 +6502,7 @@
65006502
"items": {
65016503
"type": "string",
65026504
"enum": [
6505+
"chatgpt-4o-latest",
65036506
"o3-mini",
65046507
"o1-preview",
65056508
"o1-preview-2024-09-12",
@@ -7618,6 +7621,50 @@
76187621
"voiceId"
76197622
]
76207623
},
7624+
"CartesiaExperimentalControls": {
7625+
"type": "object",
7626+
"properties": {
7627+
"speed": {
7628+
"type": "string",
7629+
"enum": [
7630+
"slowest",
7631+
"slow",
7632+
"normal",
7633+
"fast",
7634+
"fastest"
7635+
],
7636+
"example": "normal"
7637+
},
7638+
"emotion": {
7639+
"type": "string",
7640+
"enum": [
7641+
"anger:lowest",
7642+
"anger:low",
7643+
"anger:high",
7644+
"anger:highest",
7645+
"positivity:lowest",
7646+
"positivity:low",
7647+
"positivity:high",
7648+
"positivity:highest",
7649+
"surprise:lowest",
7650+
"surprise:low",
7651+
"surprise:high",
7652+
"surprise:highest",
7653+
"sadness:lowest",
7654+
"sadness:low",
7655+
"sadness:high",
7656+
"sadness:highest",
7657+
"curiosity:lowest",
7658+
"curiosity:low",
7659+
"curiosity:high",
7660+
"curiosity:highest"
7661+
],
7662+
"example": [
7663+
"happiness:high"
7664+
]
7665+
}
7666+
}
7667+
},
76217668
"CartesiaVoice": {
76227669
"type": "object",
76237670
"properties": {
@@ -7628,6 +7675,10 @@
76287675
"cartesia"
76297676
]
76307677
},
7678+
"voiceId": {
7679+
"type": "string",
7680+
"description": "The ID of the particular voice you want to use."
7681+
},
76317682
"model": {
76327683
"type": "string",
76337684
"description": "This is the model that will be used. This is optional and will default to the correct model for the voiceId.",
@@ -7661,6 +7712,14 @@
76617712
],
76627713
"example": "en"
76637714
},
7715+
"experimentalControls": {
7716+
"description": "Experimental controls for Cartesia voice generation",
7717+
"allOf": [
7718+
{
7719+
"$ref": "#/components/schemas/CartesiaExperimentalControls"
7720+
}
7721+
]
7722+
},
76647723
"chunkPlan": {
76657724
"description": "This is the plan for chunking the model output before it is sent to the voice provider.",
76667725
"allOf": [
@@ -7669,10 +7728,6 @@
76697728
}
76707729
]
76717730
},
7672-
"voiceId": {
7673-
"type": "string",
7674-
"description": "This is the provider-specific ID that will be used."
7675-
},
76767731
"fallbackPlan": {
76777732
"description": "This is the plan for voice provider fallbacks in the event that the primary voice provider fails.",
76787733
"allOf": [
@@ -8657,6 +8712,10 @@
86578712
"cartesia"
86588713
]
86598714
},
8715+
"voiceId": {
8716+
"type": "string",
8717+
"description": "The ID of the particular voice you want to use."
8718+
},
86608719
"model": {
86618720
"type": "string",
86628721
"description": "This is the model that will be used. This is optional and will default to the correct model for the voiceId.",
@@ -8690,9 +8749,13 @@
86908749
],
86918750
"example": "en"
86928751
},
8693-
"voiceId": {
8694-
"type": "string",
8695-
"description": "This is the provider-specific ID that will be used."
8752+
"experimentalControls": {
8753+
"description": "Experimental controls for Cartesia voice generation",
8754+
"allOf": [
8755+
{
8756+
"$ref": "#/components/schemas/CartesiaExperimentalControls"
8757+
}
8758+
]
86968759
},
86978760
"chunkPlan": {
86988761
"description": "This is the plan for chunking the model output before it is sent to the voice provider.",

fern/test/voice-testing.mdx

Lines changed: 37 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,36 @@ slug: /test/voice-testing
66

77
## Overview
88

9-
Voice Testing is an end-to-end feature that automates testing of your AI voice agents. Our platform simulates a call from an AI tester that interacts with your voice agent by following a pre-defined call script. After the call, the transcript is sent to a language model (LLM) along with your success criteria. The LLM then determines if the call met the defined objectives.
9+
Voice Testing is an end-to-end feature that automates testing of your AI voice agents. Our platform simulates a call from an AI tester that interacts with your voice agent by following a pre-defined call script. After the call, the transcript is sent to a language model (LLM) along with your evaluation rubric. The LLM then determines if the call met the defined objectives.
1010

1111
## Creating a Test Suite
1212

1313
Begin by creating a Test Suite that organizes and executes multiple test cases.
1414

1515
<Steps>
1616
### Step 1: Create a New Test Suite
17-
- Navigate to the **Test** tab in your dashboard and select **Voice Testing**.
17+
- Navigate to the **Test** tab in your dashboard and select **Test Suites**.
1818
- Click the **Create Test Suite** button.
1919

2020
### Step 2: Define Test Suite Details
2121
- Enter a title for your Test Suite.
2222
- Select a phone number from your organization using the dropdown.
23+
- Make sure the phone number has an assistant assigned to it (if not, navigate to Phone Numbers tab to complete that action).
2324

2425
### Step 3: Add Test Cases
2526
- Once your Test Suite is created, you will see a table where you can add test cases.
26-
- Click **Add Test Case** to add a new test case (up to 50 can be added).
27+
- Click **Add Test** to add a new test case (up to 50 can be added).
2728

2829
### Step 4: Configure Each Test Case
29-
- **Caller Behavior:** Define how the testing agent should behave, including a detailed multi-step prompt that outlines the customer's intent, emotions, and interaction style.
30-
- **Success Criteria:** List one or more questions that an LLM will use to evaluate if the call was successful.
30+
- **Script:** Define how the testing agent should behave, including a detailed multi-step prompt to simulate how the customer should behave on the call.
31+
- **Rubric** List one or more questions that an LLM will use to evaluate if the call was successful.
3132
- **Attempts:** Choose the number of times (up to 5) the test case should be executed each time the Test Suite is run.
3233

3334
### Step 5: Run and Review Tests
3435
- Click **Run Tests** to execute all test cases one by one.
3536
- While tests are running, you will see a loading state.
3637
- Upon completion, a table displays the outcomes with check marks (success) or x-marks (failure).
37-
- Click on a test row to view detailed results: a dropdown shows each attempt, the LLM's reasoning, the transcript of the call, the defined caller behavior, and the success criteria.
38-
39-
### Step 6: Export Results
40-
- You can export the test results as a CSV file for further analysis.
38+
- Click on a test row to view detailed results: a dropdown shows each attempt, the LLM's reasoning, the transcript of the call, the defined script, and the success rubric.
4139
</Steps>
4240

4341
## Test Execution and Evaluation
@@ -51,74 +49,56 @@ When you run a Test Suite, the following steps occur:
5149
- The number of attempts made.
5250
- The LLM's reasoning for each attempt.
5351
- The complete call transcript.
54-
- The configured caller behavior and success criteria.
55-
- **CSV Export:** Results can be exported for additional review or compliance purposes.
52+
- The configured script and rubric.
5653

5754
## Example Test Cases
5855

59-
Below are three example test cases to illustrate how you can configure detailed caller behavior and success criteria.
60-
61-
### Example 1: Account Inquiry
56+
Below are three example test cases to illustrate how you can configure detailed simulation scripts and evaluation rubrics.
6257

63-
**Caller Behavior:**
64-
Simulate a customer inquiring about their account status with growing concern as unexplained charges appear in their statement.
58+
### Example 1: Billing Support
6559

66-
**Example Prompt:**
67-
```md title="Example Prompt" wordWrap
68-
[Identity]
69-
You are a long-time bank customer with a keen eye for your financial details.
60+
In this example, we will simulate a customer who is frustrated and calling about a billing discrepancy.
7061

71-
[Personality]
72-
Normally calm and polite, you become increasingly anxious when you notice discrepancies on your account statement. Your tone shifts from supportive to urgent as the conversation progresses.
73-
74-
[Goals]
75-
Your primary objective is to clarify several unexplained charges by requesting a detailed breakdown of your recent transactions and ensuring your account balance is accurate.
76-
77-
[Interaction Style]
78-
Begin the call by stating your name and expressing concern over unexpected charges. Ask straightforward questions and press for more details if the explanation is not satisfactory.
62+
**Script:**
63+
```md title="Script" wordWrap
64+
1. Express anger over an unexpected charge and the current bill appearing unusually high.
65+
2. Try to get a detailed explanation, confirming whether an overcharge occurred, and understanding the steps for resolution.
66+
3. End the call.
7967
```
8068

81-
**Success Criteria:**
82-
```md title="Success Criteria" wordWrap
83-
1. The voice agent clearly presents the current account balance.
84-
2. The voice agent provides a detailed breakdown of recent transactions.
85-
3. The response addresses the customer's concerns in a calm and informative manner.
69+
**Rubric:**
70+
```md title="Rubric" wordWrap
71+
The voice agent acknowledges the billing discrepancy respectfully without dismissing the concern.
8672
```
8773

88-
### Example 2: Billing Support
89-
90-
**Caller Behavior:**
91-
Simulate a customer who is frustrated and calling about a billing discrepancy.
74+
### Example 2: Account Inquiry
9275

93-
**Example Prompt:**
94-
```txt title="Example Prompt" wordWrap
95-
[Identity]
96-
You are a loyal customer who has always trusted the billing process, but the current bill appears unusually high.
76+
Unlike in the previous example, this time we will provide a more free-form script for the test agent to follow.
9777

98-
[Personality]
99-
Frustrated and assertive, you express anger over an unexpected charge while remaining focused on obtaining clarification.
78+
**Script:**
79+
```md title="Script" wordWrap
80+
Simulate a customer inquiring about their account status with growing concern as unexplained charges appear in their statement.
10081

101-
[Goals]
102-
Your goal is to understand the discrepancy in your bill by obtaining a detailed explanation, confirming whether an overcharge occurred, and understanding the steps for resolution.
82+
Your primary objective is to clarify several unexplained charges by requesting a detailed breakdown of your recent transactions and ensuring your account balance is accurate.
10383

104-
[Interaction Style]
105-
Start the call by clearly stating your billing concern, describing the specific overcharge, and requesting a comprehensive explanation with resolution options.
84+
Begin the call by stating your name and expressing concern over unexpected charges. Ask straightforward questions and press for more details if the explanation is not satisfactory.
10685
```
10786

108-
**Success Criteria:**
109-
```md title="Success Criteria" wordWrap
110-
1. The voice agent acknowledges the billing discrepancy respectfully without dismissing the concern.
111-
2. The agent provides a clear explanation of the charges, detailing possible reasons for the discrepancy.
112-
3. The conversation concludes with a proposed solution or a clear escalation plan to address the overcharge.
87+
**Rubric:**
88+
```md title="Rubric" wordWrap
89+
1. The voice agent clearly presents the current account balance.
90+
2. The voice agent provides a detailed breakdown of recent transactions.
91+
3. The response addresses the customer's concerns in a calm and informative manner.
11392
```
11493

11594
### Example 3: Appointment Scheduling
11695

117-
**Caller Behavior:**
96+
This time, we will spin up an even more detailed personality for the test agent. By showing these varied styles of scripts, we hope to show the flexibility of the Voice Testing feature and how you can use it to meet your testing needs.
97+
98+
**Script:**
99+
```md title="Script" wordWrap
118100
Simulate a customer trying to schedule an appointment with a hint of urgency due to previous delays.
119101

120-
**Example Prompt:**
121-
```md title="Example Prompt" wordWrap
122102
[Identity]
123103
You are an organized customer who values efficiency and punctuality.
124104

@@ -132,8 +112,8 @@ Your goal is to secure an appointment at your preferred time, while remaining fl
132112
Begin the call by stating your need for an appointment, specifying a preferred date and time (e.g., next Monday at 3 PM). Request clear confirmation of your slot, and if unavailable, ask for suitable alternatives.
133113
```
134114

135-
**Success Criteria:**
136-
```md title="Success Criteria" wordWrap
115+
**Rubric:**
116+
```md title="Rubric" wordWrap
137117
1. The voice agent confirms the requested appointment time clearly and accurately.
138118
2. The agent reiterates the appointment details to ensure clarity.
139119
3. The scheduling process ends with a definitive confirmation message of the booked appointment.

0 commit comments

Comments
 (0)