Skip to content

Commit cc230ec

Browse files
authored
[Batch Avatar] Add sample of how to enable voice sync for avatar in Batch Synthesis (#2946)
* Update batch avatar sample (C#) to include built-in voice * Update python sample * Merge * Add photo avatar sample in batch C# * Add more samples
1 parent b2f21a8 commit cc230ec

File tree

5 files changed

+108
-32
lines changed

5 files changed

+108
-32
lines changed

samples/batch-avatar/csharp/BatchAvatarSample/BatchAvatarSample.sln

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@ MinimumVisualStudioVersion = 10.0.40219.1
55
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BatchAvatarSample", "BatchAvatarSample\BatchAvatarSample.csproj", "{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}"
66
EndProject
77
Global
8-
GlobalSection(SolutionConfigurationPlatforms) = preSolution
9-
Debug|Any CPU = Debug|Any CPU
10-
Release|Any CPU = Release|Any CPU
11-
EndGlobalSection
12-
GlobalSection(ProjectConfigurationPlatforms) = postSolution
13-
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
14-
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.Build.0 = Debug|Any CPU
15-
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.ActiveCfg = Release|Any CPU
16-
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.Build.0 = Release|Any CPU
17-
EndGlobalSection
18-
GlobalSection(SolutionProperties) = preSolution
19-
HideSolutionNode = FALSE
20-
EndGlobalSection
21-
GlobalSection(ExtensibilityGlobals) = postSolution
22-
SolutionGuid = {B0A9639E-340F-46FF-B5AE-92569CFF2A72}
23-
EndGlobalSection
8+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
9+
Debug|Any CPU = Debug|Any CPU
10+
Release|Any CPU = Release|Any CPU
11+
EndGlobalSection
12+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
13+
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
14+
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Debug|Any CPU.Build.0 = Debug|Any CPU
15+
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.ActiveCfg = Release|Any CPU
16+
{CF5ED996-4313-480A-8A3D-0BB8C3D0B73A}.Release|Any CPU.Build.0 = Release|Any CPU
17+
EndGlobalSection
18+
GlobalSection(SolutionProperties) = preSolution
19+
HideSolutionNode = FALSE
20+
EndGlobalSection
21+
GlobalSection(ExtensibilityGlobals) = postSolution
22+
SolutionGuid = {B0A9639E-340F-46FF-B5AE-92569CFF2A72}
23+
EndGlobalSection
2424
EndGlobal

samples/batch-avatar/csharp/BatchAvatarSample/BatchAvatarSample/Program.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ private static async Task Main(string[] args)
7373
Console.WriteLine("Summary file can be downloaded from:");
7474
Console.WriteLine(job.Outputs!.Summary);
7575

76+
Console.WriteLine("Press Enter to delete the job and exit.");
77+
var input = Console.ReadLine();
78+
7679
//
7780
// Delete a job
7881
//
@@ -97,8 +100,7 @@ private static async Task Main(string[] args)
97100

98101
private static async Task<BatchAvatarJob> CreateBatchAvatarJob(HttpClient httpClient, string jobUri)
99102
{
100-
// To use SSML as input, please refer to RequestExamples.SsmlRequest
101-
// To use your custom neural voice, please refer to RequestExamples.CustomVoiceRequest
103+
// Please refer to RequestExamples.cs for more request examples.
102104
var requestBody = new BatchAvatarRequest
103105
{
104106
InputKind = "PlainText",
@@ -117,11 +119,12 @@ private static async Task<BatchAvatarJob> CreateBatchAvatarJob(HttpClient httpCl
117119
{
118120
TalkingAvatarCharacter = "lisa", // Avatar character
119121
TalkingAvatarStyle = "graceful-sitting", // Avatar style, required for prebuilt avatar, optional for custom avatar
122+
Customized = false, // Set to true if you want to use custom avatar
120123
VideoFormat = "mp4", // mp4 or webm, webm is required for transparent background
121124
VideoCodec = "h264", // hevc, h264 or vp9, vp9 is required for transparent background; default is hevc
122125
SubtitleType = "soft_embedded",
123126
BackgroundColor = "#FFFFFFFF", // background color in RGBA format, default is white; can be set to 'transparent' for transparent background
124-
Customized = false, // Set to true if you want to use custom avatar
127+
UseBuiltInVoice = false, // Set to true to use voice sync for avatar for custom avatar
125128
},
126129
};
127130

samples/batch-avatar/csharp/BatchAvatarSample/BatchAvatarSample/RequestExamples.cs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ internal static class RequestExamples
4949
// Invalid voice name or deployment ID will be rejected.
5050
CustomVoices = new Dictionary<string, Guid>
5151
{
52-
["YOUR_CUSTOM_VOICE_NAME"] = Guid.Parse("YOUR_CUSTOM_VOICE_DEPLOYMENT_ID"),
52+
["YOUR_CUSTOM_VOICE_NAME"] = Guid.Parse("12345678-0000-0000-0000-000000000000"),
5353
},
5454
SynthesisConfig = new()
5555
{
@@ -65,4 +65,71 @@ internal static class RequestExamples
6565
BackgroundColor = "#FFFFFFFF",
6666
},
6767
};
68+
69+
public static BatchAvatarRequest VoiceSyncForAvatar = new()
70+
{
71+
InputKind = "PlainText",
72+
Inputs =
73+
[
74+
new BatchAvatarInput
75+
{
76+
Content = "Hi, I'm a virtual assistant created by Microsoft.",
77+
},
78+
],
79+
AvatarConfig = new()
80+
{
81+
TalkingAvatarCharacter = "my-custom-avatar",
82+
Customized = true,
83+
UseBuiltInVoice = true,
84+
VideoFormat = "mp4",
85+
VideoCodec = "h264",
86+
},
87+
};
88+
89+
public static BatchAvatarRequest PublicPhotoAvatar = new()
90+
{
91+
InputKind = "PlainText",
92+
Inputs =
93+
[
94+
new BatchAvatarInput
95+
{
96+
Content = "Hi, I'm a virtual assistant created by Microsoft.",
97+
},
98+
],
99+
SynthesisConfig = new()
100+
{
101+
Voice = "en-US-AndrewNeural",
102+
},
103+
AvatarConfig = new()
104+
{
105+
TalkingAvatarCharacter = "Matteo",
106+
PhotoAvatarBaseModel = "vasa-1",
107+
VideoFormat = "mp4",
108+
VideoCodec = "h264",
109+
},
110+
};
111+
112+
public static BatchAvatarRequest CustomPhotoAvatar = new()
113+
{
114+
InputKind = "PlainText",
115+
Inputs =
116+
[
117+
new BatchAvatarInput
118+
{
119+
Content = "Hi, I'm a virtual assistant created by Microsoft.",
120+
},
121+
],
122+
SynthesisConfig = new()
123+
{
124+
Voice = "en-US-AndrewNeural",
125+
},
126+
AvatarConfig = new()
127+
{
128+
TalkingAvatarCharacter = "my-photo-avatar",
129+
Customized = true,
130+
PhotoAvatarBaseModel = "vasa-1",
131+
VideoFormat = "mp4",
132+
VideoCodec = "h264",
133+
},
134+
};
68135
}

samples/batch-avatar/csharp/BatchAvatarSample/BatchAvatarSample/dto/BatchAvatarJob.cs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public class BatchAvatarRequest
2121

2222
public BatchSynthesisProperties? Properties { get; set; }
2323

24-
public required BatchAvatarconfig AvatarConfig { get; set; }
24+
public required BatchAvatarConfig AvatarConfig { get; set; }
2525
}
2626

2727
public class BatchAvatarJob
@@ -42,7 +42,7 @@ public class BatchAvatarJob
4242

4343
public BatchSynthesisProperties? Properties { get; set; }
4444

45-
public required BatchAvatarconfig AvatarConfig { get; set; }
45+
public required BatchAvatarConfig AvatarConfig { get; set; }
4646

4747
public BatchSynthesisOutputs? Outputs { get; set; }
4848
}
@@ -62,23 +62,23 @@ public class BatchSynthesisProperties
6262

6363
public string? DestinationPath { get; set; }
6464

65-
public int? sizeInBytes { get; set; }
65+
public int? SizeInBytes { get; set; }
6666

67-
public int? succeededCount { get; set; }
67+
public int? SucceededCount { get; set; }
6868

69-
public int? failedCount { get; set; }
69+
public int? FailedCount { get; set; }
7070

71-
public int? durationInMilliseconds { get; set; }
71+
public int? DurationInMilliseconds { get; set; }
7272

73-
public Billingdetails? billingDetails { get; set; }
73+
public Billingdetails? BillingDetails { get; set; }
7474

7575
public Error? Error { get; set; }
7676
}
7777

7878
public class Billingdetails
7979
{
80-
public int neuralCharacters { get; set; }
81-
public int talkingAvatarDurationSeconds { get; set; }
80+
public int NeuralCharacters { get; set; }
81+
public int TalkingAvatarDurationSeconds { get; set; }
8282
}
8383

8484
public class BatchSynthesisconfig
@@ -94,7 +94,7 @@ public class BatchSynthesisconfig
9494
public string? Volume { get; set; }
9595
}
9696

97-
public class BatchAvatarconfig
97+
public class BatchAvatarConfig
9898
{
9999
public required string TalkingAvatarCharacter { get; set; }
100100

@@ -119,6 +119,10 @@ public class BatchAvatarconfig
119119
public int? BitrateKbps { get; set; }
120120

121121
public bool? Customized { get; set; }
122+
123+
public bool? UseBuiltInVoice { get; set; }
124+
125+
public string? PhotoAvatarBaseModel { get; set; }
122126
}
123127

124128
public class Videocrop
@@ -143,7 +147,7 @@ public class BillingDetails
143147

144148
public long CustomNeuralCharacters { get; set; }
145149

146-
public long talkingAvatarDurationSeconds { get; set; }
150+
public long TalkingAvatarDurationSeconds { get; set; }
147151

148152
public long AoaiCharacters { get; set; }
149153

samples/batch-avatar/python/synthesis.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,11 @@ def submit_synthesis(job_id: str):
8989
"videoCodec": "h264", # hevc, h264 or vp9, vp9 is required for transparent background; default is hevc
9090
"subtitleType": "soft_embedded",
9191
"backgroundColor": "#FFFFFFFF", # background color in RGBA format, default is white;
92-
# can be set to 'transparent' for transparent background
92+
# can be set to 'transparent' for transparent background
9393
# "backgroundImage": "https://samples-files.com/samples/Images/jpg/1920-1080-sample.jpg",
9494
# background image URL, only support https, either backgroundImage or backgroundColor can be set
95+
"useBuiltInVoice": False, # whether to use voice sync of custom avatar.
96+
# Only valid if customized is True and the custom avatar is trained with voice sync enabled.
9597
}
9698
}
9799

0 commit comments

Comments
 (0)