Skip to content

Commit 465392f

Browse files
committed
feat: Add Groq AI for audio transcription
1 parent b40faaa commit 465392f

File tree

4 files changed

+94
-10
lines changed

4 files changed

+94
-10
lines changed

app/src/main/java/com/wmods/wppenhacer/xposed/features/others/AudioTranscript.java

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import com.wmods.wppenhacer.xposed.core.components.FMessageWpp;
99
import com.wmods.wppenhacer.xposed.core.devkit.Unobfuscator;
1010
import com.wmods.wppenhacer.xposed.utils.ReflectionUtils;
11+
import com.wmods.wppenhacer.xposed.utils.ResId;
12+
import com.wmods.wppenhacer.xposed.utils.Utils;
1113

1214
import org.json.JSONObject;
1315

@@ -32,7 +34,19 @@ public AudioTranscript(@NonNull ClassLoader classLoader, @NonNull XSharedPrefere
3234
@Override
3335
public void doHook() throws Throwable {
3436

35-
if (!prefs.getBoolean("assemblyai", false) || TextUtils.isEmpty(prefs.getString("assemblyai_key", "")))
37+
if (!prefs.getBoolean("audio_transcription", false))
38+
return;
39+
40+
String provider = prefs.getString("transcription_provider", "assemblyai");
41+
String apiKey = "";
42+
43+
if ("groq".equals(provider)) {
44+
apiKey = prefs.getString("groq_api_key", "");
45+
} else {
46+
apiKey = prefs.getString("assemblyai_key", "");
47+
}
48+
49+
if (TextUtils.isEmpty(apiKey))
3650
return;
3751

3852
var transcribeMethod = Unobfuscator.loadTranscribeMethod(classLoader);
@@ -46,11 +60,23 @@ protected void beforeHookedMethod(MethodHookParam param) throws Throwable {
4660
var fmessageObj = fieldFMessage.get(pttTranscriptionRequest);
4761
var fmessage = new FMessageWpp(fmessageObj);
4862
File file = fmessage.getMediaFile();
63+
if (file == null) {
64+
Utils.showToast(Utils.getApplication().getString(ResId.string.download_not_available), 1);
65+
return;
66+
}
4967
var callback = param.args[1];
5068
var onComplete = ReflectionUtils.findMethodUsingFilter(callback.getClass(), method -> method.getParameterCount() == 4);
5169
if (file == null || !file.exists())
5270
return;
53-
String transcript = runTranscript(file);
71+
72+
// Choose transcription provider based on user preference
73+
String transcript;
74+
if ("groq".equals(provider)) {
75+
transcript = transcriptionGroqAI(file);
76+
} else {
77+
transcript = transcriptionAssemblyAI(file);
78+
}
79+
5480
var segments = new ArrayList<>();
5581
var words = transcript.split("\\s");
5682
var totalLength = 0;
@@ -65,7 +91,7 @@ protected void beforeHookedMethod(MethodHookParam param) throws Throwable {
6591

6692
}
6793

68-
private String runTranscript(File fileOpus) throws Exception {
94+
private String transcriptionAssemblyAI(File fileOpus) throws Exception {
6995
String apiKey = prefs.getString("assemblyai_key", "");
7096
if (TextUtils.isEmpty(apiKey)) {
7197
return "API key not provided";
@@ -139,6 +165,41 @@ private String runTranscript(File fileOpus) throws Exception {
139165
}
140166
}
141167

168+
private String transcriptionGroqAI(File fileAudio) throws Exception {
169+
String apiKey = prefs.getString("groq_api_key", "");
170+
if (TextUtils.isEmpty(apiKey)) {
171+
return "Groq API key not provided";
172+
}
173+
174+
OkHttpClient client = new OkHttpClient();
175+
176+
// Groq API accepts direct file upload with multipart/form-data
177+
RequestBody requestBody = new okhttp3.MultipartBody.Builder()
178+
.setType(okhttp3.MultipartBody.FORM)
179+
.addFormDataPart("file", fileAudio.getName(),
180+
RequestBody.create(fileAudio, MediaType.parse("audio/ogg")))
181+
.addFormDataPart("model", "whisper-large-v3-turbo")
182+
.addFormDataPart("response_format", "json")
183+
.addFormDataPart("temperature", "0")
184+
.build();
185+
186+
Request transcribeRequest = new Request.Builder()
187+
.url("https://api.groq.com/openai/v1/audio/transcriptions")
188+
.addHeader("Authorization", "Bearer " + apiKey)
189+
.post(requestBody)
190+
.build();
191+
192+
try (okhttp3.Response response = client.newCall(transcribeRequest).execute()) {
193+
if (!response.isSuccessful()) {
194+
return "Failed to transcribe audio: " + response.code() + " - " + response.message();
195+
}
196+
197+
JSONObject result = new JSONObject(response.body().string());
198+
return result.getString("text");
199+
}
200+
}
201+
202+
142203
@NonNull
143204
@Override
144205
public String getPluginName() {

app/src/main/res/values/arrays.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
<item>2.25.30.xx</item>
128128
<item>2.25.31.xx</item>
129129
<item>2.25.32.xx</item>
130+
<item>2.25.33.xx</item>
130131
</string-array>
131132
<string-array name="supported_versions_business">
132133
<item>2.25.25.xx</item>
@@ -137,6 +138,7 @@
137138
<item>2.25.30.xx</item>
138139
<item>2.25.31.xx</item>
139140
<item>2.25.32.xx</item>
141+
<item>2.25.33.xx</item>
140142
</string-array>
141143
<string-array name="image_picker">
142144
<item>image/*</item>
@@ -151,6 +153,14 @@
151153
<item>@string/audio_note</item>
152154
<item>@string/voice_note</item>
153155
</string-array>
156+
<string-array name="transcription_provider_entries">
157+
<item>AssemblyAI</item>
158+
<item>Groq AI (Whisper v3 Turbo)</item>
159+
</string-array>
160+
<string-array name="transcription_provider_values">
161+
<item>assemblyai</item>
162+
<item>groq</item>
163+
</string-array>
154164
<string-array name="animations_values">
155165
<item>default</item>
156166
<item>fade_in</item>

app/src/main/res/values/strings.xml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,14 @@
373373
<string name="custom_privacy_sum">Enable/Disable Custom Privacy</string>
374374
<string name="custom_privacy_per_contact">Custom Privacy per contact</string>
375375
<string name="custom_privacy_per_contact_sum">Activate the custom privacy button on the contact information screen</string>
376+
<string name="transcription_provider">Transcription Provider</string>
377+
<string name="transcription_provider_sum">Choose the AI service for audio transcription</string>
376378
<string name="assemblyai">Enable AssemblyAI</string>
377379
<string name="assemblyai_sum">Replaces the default Whatsapp transcription with AssemblyAI</string>
378-
<string name="assemblyai_key">AssemblyAI key</string>
379-
<string name="assemblyai_key_sum">AssemblyAI API access key (Login to the website and create an account to get your key)</string>
380+
<string name="assemblyai_key">AssemblyAI API Key</string>
381+
<string name="assemblyai_key_sum">AssemblyAI API access key (Get it at assemblyai.com)</string>
382+
<string name="groq_api_key">Groq API Key</string>
383+
<string name="groq_api_key_sum">Groq API access key (Get it at console.groq.com)</string>
380384
<string name="google_translate">Enable Google Translate</string>
381385
<string name="google_translate_sum">Replaces Whatsapp\'s native translator with Google Translate</string>
382386
<string name="show_contact_info">Show in contact info</string>

app/src/main/res/xml/fragment_media.xml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,27 @@
100100
app:summary="@string/audio_transcription_sum"
101101
app:title="@string/audio_transcription" />
102102

103-
<rikka.material.preference.MaterialSwitchPreference
103+
<ListPreference
104104
android:dependency="audio_transcription"
105-
app:key="assemblyai"
106-
app:summary="@string/assemblyai_sum"
107-
app:title="@string/assemblyai" />
105+
android:defaultValue="assemblyai"
106+
android:entries="@array/transcription_provider_entries"
107+
android:entryValues="@array/transcription_provider_values"
108+
app:key="transcription_provider"
109+
app:summary="@string/transcription_provider_sum"
110+
app:title="@string/transcription_provider" />
108111

109112
<EditTextPreference
110-
android:dependency="assemblyai"
113+
android:dependency="audio_transcription"
111114
app:key="assemblyai_key"
112115
app:summary="@string/assemblyai_key_sum"
113116
app:title="@string/assemblyai_key" />
114117

118+
<EditTextPreference
119+
android:dependency="audio_transcription"
120+
app:key="groq_api_key"
121+
app:summary="@string/groq_api_key_sum"
122+
app:title="@string/groq_api_key" />
123+
115124
</PreferenceCategory>
116125

117126
<PreferenceCategory

0 commit comments

Comments
 (0)