Skip to content

Commit 4fde64c

Browse files
Add translation and transcription endpoints
1 parent 3a83729 commit 4fde64c

19 files changed

+719
-38
lines changed

.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Package.resolved

Lines changed: 2 additions & 38 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
//
2+
// Audio.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
enum Audio {
28+
case createTranscriptionPayload(payload: CreateTranscriptionPayload)
29+
case createTranslationPayload(payload: CreateTranslationPayload)
30+
case createSpeechPayload(payload: CreateSpeechPayload, destination: URL)
31+
}
32+
33+
extension Audio: EndpointConfiguration {
34+
35+
var method: HTTPRequestMethod {
36+
return .post
37+
}
38+
39+
var path: String {
40+
switch self {
41+
case .createTranscriptionPayload:
42+
return "/audio/transcriptions"
43+
case .createTranslationPayload:
44+
return "/audio/translations"
45+
case .createSpeechPayload:
46+
return "/audio/speech"
47+
}
48+
}
49+
50+
var task: RequestTask {
51+
switch self {
52+
case .createTranscriptionPayload(let payload):
53+
var data: [FormBodyPart] = [
54+
FormBodyPart(
55+
name: "file",
56+
value: .fileURL(payload.file),
57+
fileName: payload.file.lastPathComponent,
58+
mimeType: payload.file.pathExtension
59+
),
60+
FormBodyPart(
61+
name: "model",
62+
value: .plainText(payload.model.rawValue)
63+
),
64+
FormBodyPart(
65+
name: "response_format",
66+
value: .plainText(payload.responseFormat?.rawValue ?? TextResponseFormat.json.rawValue)
67+
)
68+
]
69+
if let temperature = payload.temperature {
70+
data.append(
71+
FormBodyPart(
72+
name: "temperature",
73+
value: .floatingPoint(Float(temperature))
74+
)
75+
)
76+
}
77+
if let prompt = payload.prompt {
78+
data.append(
79+
FormBodyPart(
80+
name: "prompt",
81+
value: .plainText(prompt)
82+
)
83+
)
84+
}
85+
if let language = payload.language {
86+
data.append(
87+
FormBodyPart(
88+
name: "language",
89+
value: .plainText(language))
90+
)
91+
}
92+
if let timestampGranularities = payload.timestampGranularities,
93+
payload.responseFormat == .verboseJson {
94+
let timestampGranularitiesData = withUnsafeBytes(of: timestampGranularities) { Data($0) }
95+
data.append(
96+
FormBodyPart(
97+
name: "timestamp_granularities",
98+
value: .data(timestampGranularitiesData)
99+
)
100+
)
101+
}
102+
return .uploadMultipart(data)
103+
case .createTranslationPayload(let payload):
104+
var data: [FormBodyPart] = [
105+
FormBodyPart(
106+
name: "file",
107+
value: .fileURL(payload.file),
108+
fileName: payload.file.lastPathComponent,
109+
mimeType: payload.file.pathExtension
110+
),
111+
FormBodyPart(
112+
name: "model",
113+
value: .plainText(payload.model.rawValue)
114+
),
115+
FormBodyPart(
116+
name: "response_format",
117+
value: .plainText(payload.responseFormat?.rawValue ?? TextResponseFormat.json.rawValue)
118+
)
119+
]
120+
if let prompt = payload.prompt {
121+
data.append(
122+
FormBodyPart(
123+
name: "prompt",
124+
value: .plainText(prompt)
125+
)
126+
)
127+
}
128+
if let temperature = payload.temperature {
129+
data.append(
130+
FormBodyPart(
131+
name: "temperature",
132+
value: .floatingPoint(Float(temperature))
133+
)
134+
)
135+
}
136+
return .uploadMultipart(data)
137+
case .createSpeechPayload(let payload, let destination):
138+
return .download(destination)
139+
}
140+
}
141+
142+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
//
2+
// Transcription.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
public struct Transcription: Codable {
28+
29+
let text: String
30+
let language: String?
31+
let duration: Double?
32+
let words: String?
33+
let segments: [TranscriptionSegment]?
34+
35+
public init(
36+
text: String,
37+
language: String? = nil,
38+
duration: Double? = nil,
39+
words: String? = nil,
40+
segments: [TranscriptionSegment]? = nil
41+
) {
42+
self.text = text
43+
self.language = language
44+
self.duration = duration
45+
self.words = words
46+
self.segments = segments
47+
}
48+
49+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//
2+
// TranscriptionSegment.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
public struct TranscriptionSegment: Codable {
28+
29+
let id: Int
30+
let seek: Int
31+
let start: Double
32+
let end: Double
33+
let text: String
34+
let tokens: [Int]
35+
let temperature: Double
36+
let avgLogprob: Double?
37+
let compressionRatio: Double?
38+
let noSpeechProb: Double?
39+
40+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//
2+
// Translation.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
public struct Translation: Codable {
28+
29+
let text: String
30+
31+
public init(text: String) {
32+
self.text = text
33+
}
34+
35+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//
2+
// AudioResponseFormat.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
public enum AudioResponseFormat: String, Codable {
28+
29+
case mp3 = "mp3"
30+
case opus = "opus"
31+
case aac = "aac"
32+
case flac = "flac"
33+
case wav = "wav"
34+
case pcm = "pcm"
35+
36+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//
2+
// STTModel.swift
3+
//
4+
// Copyright (c) 2024 Exyte
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in
14+
// all copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
// THE SOFTWARE.
23+
//
24+
25+
import Foundation
26+
27+
public enum STTModel: String, Codable {
28+
case whisper1 = "whisper-1"
29+
}

0 commit comments

Comments
 (0)