Skip to content

Commit 947e08a

Browse files
committed
feat:Add pitch Option
1 parent 2b7e081 commit 947e08a

File tree

4 files changed

+38
-12
lines changed

4 files changed

+38
-12
lines changed

internal/communicate/communicate.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import (
2828

2929
const (
3030
ssmlHeaderTemplate = "X-RequestId:%s\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:%sZ\r\nPath:ssml\r\n\r\n"
31-
ssmlTemplate = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='%s'><prosody pitch='+0Hz' rate='%s' volume='%s'>%s</prosody></voice></speak>"
31+
ssmlTemplate = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='%s'><prosody pitch='%s' rate='%s' volume='%s'>%s</prosody></voice></speak>"
3232
)
3333

3434
var (
@@ -46,26 +46,27 @@ func init() {
4646
type Communicate struct {
4747
text string
4848
voice string
49-
voiceLanguageRegion string
49+
pitch string
5050
rate string
5151
volume string
52+
voiceLanguageRegion string
5253

5354
httpProxy string
5455
socket5Proxy string
5556
socket5ProxyUser string
5657
socket5ProxyPass string
57-
op chan map[string]interface{}
5858

5959
audioDataIndex int
6060
prevIdx int
6161
shiftTime int
6262
finalUtterance map[int]int
63+
op chan map[string]interface{}
6364
}
6465

6566
type textEntry struct {
6667
Text string `json:"text"`
67-
Length int64 `json:"Length"`
6868
BoundaryType string `json:"BoundaryType"`
69+
Length int64 `json:"Length"`
6970
}
7071
type dataEntry struct {
7172
Offset int `json:"Offset"`
@@ -112,6 +113,7 @@ func NewCommunicate(text string, opt *communicateOption.CommunicateOption) (*Com
112113
}
113114
return &Communicate{
114115
text: text,
116+
pitch: opt.Pitch,
115117
voice: opt.Voice,
116118
voiceLanguageRegion: opt.VoiceLangRegion,
117119
rate: opt.Rate,
@@ -171,7 +173,7 @@ func makeHeaders() http.Header {
171173
func (c *Communicate) stream() (<-chan map[string]interface{}, error) {
172174
texts := splitTextByByteLength(
173175
escape(removeIncompatibleCharacters(c.text)),
174-
calculateMaxMessageSize(c.voice, c.rate, c.volume),
176+
calculateMaxMessageSize(c.pitch, c.voice, c.rate, c.volume),
175177
)
176178
c.audioDataIndex = len(texts)
177179

@@ -234,7 +236,7 @@ func (c *Communicate) sendSSML(conn *websocket.Conn, currentTime string, text []
234236
ssmlHeadersAppendExtraData(
235237
generateConnectID(),
236238
currentTime,
237-
makeSsml(string(text), c.voice, c.rate, c.volume),
239+
makeSsml(string(text), c.pitch, c.voice, c.rate, c.volume),
238240
),
239241
))
240242
}
@@ -474,10 +476,11 @@ func splitTextByByteLength(text string, byteLength int) [][]byte {
474476
return result
475477
}
476478

477-
func makeSsml(text string, voice string, rate string, volume string) string {
479+
func makeSsml(text string, pitch, voice string, rate string, volume string) string {
478480
ssml := fmt.Sprintf(
479481
ssmlTemplate,
480482
voice,
483+
pitch,
481484
rate,
482485
volume,
483486
text)
@@ -500,9 +503,9 @@ func ssmlHeadersAppendExtraData(requestID string, timestamp string, ssml string)
500503
return headers + ssml
501504
}
502505

503-
func calculateMaxMessageSize(voice string, rate string, volume string) int {
506+
func calculateMaxMessageSize(pitch, voice string, rate string, volume string) int {
504507
websocketMaxSize := 1 << 16
505-
overheadPerMessage := len(ssmlHeadersAppendExtraData(generateConnectID(), currentTimeInMST(), makeSsml("", voice, rate, volume))) + 50
508+
overheadPerMessage := len(ssmlHeadersAppendExtraData(generateConnectID(), currentTimeInMST(), makeSsml("", pitch, voice, rate, volume))) + 50
506509
return websocketMaxSize - overheadPerMessage
507510
}
508511

internal/communicateOption/option.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package communicateOption
22

33
import (
44
"fmt"
5-
"github.com/lib-x/edgetts/internal/businessConsts"
65
"strings"
6+
7+
"github.com/lib-x/edgetts/internal/businessConsts"
78
)
89

910
type CommunicateOption struct {
1011
Voice string
1112
VoiceLangRegion string
13+
Pitch string
1214
Rate string
1315
Volume string
1416
HttpProxy string
@@ -31,6 +33,9 @@ func (c *CommunicateOption) CheckAndApplyDefaultOption() {
3133
name := voiceParsed[2]
3234
c.VoiceLangRegion = fmt.Sprintf(businessConsts.VoiceNameTemplate, lang, region, name)
3335
}
36+
if c.Pitch == "" {
37+
c.Pitch = "+0Hz"
38+
}
3439
if c.Rate == "" {
3540
c.Rate = "+0%"
3641
}

internal/validate/validate.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@ package validate
22

33
import (
44
"errors"
5-
"github.com/lib-x/edgetts/internal/communicateOption"
65
"regexp"
6+
7+
"github.com/lib-x/edgetts/internal/communicateOption"
78
)
89

910
var (
11+
validPitchPattern = regexp.MustCompile(`^[+-]\d+Hz$`)
1012
validVoicePattern = regexp.MustCompile(`^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$`)
1113
validRateVolumePattern = regexp.MustCompile(`^[+-]\d+%$`)
1214
)
1315

1416
var (
1517
InvalidVoiceError = errors.New("invalid voice")
18+
InvalidPitchError = errors.New("invalid pitch")
1619
InvalidRateError = errors.New("invalid rate")
1720
InvalidVolumeError = errors.New("invalid volume")
1821
)
@@ -24,8 +27,11 @@ func WithCommunicateOption(c *communicateOption.CommunicateOption) error {
2427
return InvalidVoiceError
2528
}
2629

30+
// WithCommunicateOption pitch
31+
if !validPitchPattern.MatchString(c.Pitch) {
32+
return InvalidPitchError
33+
}
2734
// WithCommunicateOption rate
28-
2935
if !validRateVolumePattern.MatchString(c.Rate) {
3036
return InvalidRateError
3137
}
@@ -34,5 +40,6 @@ func WithCommunicateOption(c *communicateOption.CommunicateOption) error {
3440
if !validRateVolumePattern.MatchString(c.Volume) {
3541
return InvalidVolumeError
3642
}
43+
3744
return nil
3845
}

option.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import "github.com/lib-x/edgetts/internal/communicateOption"
55
type option struct {
66
Voice string
77
VoiceLangRegion string
8+
Pitch string
89
Rate string
910
Volume string
1011
HttpProxy string
@@ -17,6 +18,7 @@ func (o *option) toInternalOption() *communicateOption.CommunicateOption {
1718
return &communicateOption.CommunicateOption{
1819
Voice: o.Voice,
1920
VoiceLangRegion: o.VoiceLangRegion,
21+
Pitch: o.Pitch,
2022
Rate: o.Rate,
2123
Volume: o.Volume,
2224
HttpProxy: o.HttpProxy,
@@ -41,12 +43,21 @@ func WithVoiceLangRegion(voiceLangRegion string) Option {
4143

4244
}
4345

46+
// WithPitch set pitch of the tts output.such as +50Hz,-50Hz
47+
func WithPitch(pitch string) Option {
48+
return func(option *option) {
49+
option.Pitch = pitch
50+
}
51+
}
52+
53+
// WithRate set rate of the tts output.rate=-50% means rate down 50%,rate=+50% means rate up 50%
4454
func WithRate(rate string) Option {
4555
return func(option *option) {
4656
option.Rate = rate
4757
}
4858
}
4959

60+
// WithVolume set volume of the tts output.volume=-50% means volume down 50%,volume=+50% means volume up 50%
5061
func WithVolume(volume string) Option {
5162
return func(option *option) {
5263
option.Volume = volume

0 commit comments

Comments
 (0)