Skip to content

Commit 0b43a02

Browse files
authored
bindings.java : enable copyLibs task [no ci] (#2949)
* bindings.java : enable copyLibs task [no ci] This commit adds a dependency on the copyLibs task to the sourcesJar and jar tasks. This ensures that the libwhisper.so file is copied to the correct location before the jar is built. It also sets the executable bit on the gradlew file. * bindings.java : add copyLibs dep for processResources [no ci] This will otherwise cause builds to fail after doing an initial build. * bindings.java : pass structs by value to native code This commit refactors the code to pass the structs by value to the native code. This is done by creating a ByValue class for each struct and using it in the Java code. The motivation for this change is that without this application crashes due to what I believe was memory mis-alignement. When the structs were passed to the native code they would be att different memory locations. Passing by value overcomes this issue and considering that the structs hold parementers (context and full params) it might be alright do to this. These changes allow all the tests to pass. * bindings.java : fix javadoc warnings [no ci] * bindings.java : fix libwhisper.dylib path in build.gradle [no ci] This commit fixes the copyLibwhisperDynlib task in the build.gradle file to copy the correct libwhisper.dylib file from build/src.
1 parent 2699e14 commit 0b43a02

File tree

12 files changed

+275
-68
lines changed

12 files changed

+275
-68
lines changed

bindings/java/build.gradle

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ sourceSets {
2525
}
2626

2727
tasks.register('copyLibwhisperDynlib', Copy) {
28-
from '../../build'
29-
include 'libwhisper.dynlib'
28+
from '../../build/src'
29+
include 'libwhisper.dylib'
3030
into 'build/generated/resources/main/darwin'
3131
}
3232

3333
tasks.register('copyLibwhisperSo', Copy) {
34-
from '../../build'
34+
from '../../build/src'
3535
include 'libwhisper.so'
3636
into 'build/generated/resources/main/linux-x86-64'
3737
}
@@ -55,7 +55,12 @@ java {
5555
withJavadocJar()
5656
}
5757

58+
sourcesJar() {
59+
dependsOn copyLibs
60+
}
61+
5862
jar {
63+
dependsOn copyLibs
5964
exclude '**/whisper_java.exp', '**/whisper_java.lib'
6065
}
6166

@@ -67,6 +72,9 @@ tasks.withType(Test) {
6772
useJUnitPlatform()
6873
}
6974

75+
test.dependsOn copyLibs
76+
processResources.dependsOn copyLibs
77+
7078
dependencies {
7179
implementation "net.java.dev.jna:jna:5.13.0"
7280
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"

bindings/java/gradlew

100644100755
File mode changed.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package io.github.ggerganov.whispercpp;
2+
3+
/**
4+
* Presets for alignment heads in DTW token timestamps
5+
*/
6+
public class WhisperConstants {
7+
// Alignment heads presets
8+
public static final int WHISPER_AHEADS_NONE = 0;
9+
public static final int WHISPER_AHEADS_TINY_EN = 1;
10+
public static final int WHISPER_AHEADS_TINY = 2;
11+
public static final int WHISPER_AHEADS_BASE_EN = 3;
12+
public static final int WHISPER_AHEADS_BASE = 4;
13+
public static final int WHISPER_AHEADS_SMALL_EN = 5;
14+
public static final int WHISPER_AHEADS_SMALL = 6;
15+
public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
16+
public static final int WHISPER_AHEADS_MEDIUM = 8;
17+
public static final int WHISPER_AHEADS_LARGE_V1 = 9;
18+
public static final int WHISPER_AHEADS_LARGE_V2 = 10;
19+
public static final int WHISPER_AHEADS_LARGE_V3 = 11;
20+
public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
21+
public static final int WHISPER_AHEADS_CUSTOM = 13;
22+
public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
23+
public static final int WHISPER_AHEADS_COUNT = 15;
24+
}
Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,36 @@
11
package io.github.ggerganov.whispercpp;
22

3+
import com.sun.jna.NativeLong;
34
import com.sun.jna.Structure;
45
import com.sun.jna.ptr.PointerByReference;
6+
import com.sun.jna.Pointer;
57
import io.github.ggerganov.whispercpp.ggml.GgmlType;
68
import io.github.ggerganov.whispercpp.WhisperModel;
79
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
810

911
import java.util.List;
1012

1113
public class WhisperContext extends Structure {
12-
int t_load_us = 0;
13-
int t_start_us = 0;
14+
public NativeLong t_load_us;
15+
public NativeLong t_start_us;
1416

1517
/** weight type (FP32 / FP16 / QX) */
16-
GgmlType wtype = GgmlType.GGML_TYPE_F16;
18+
public GgmlType wtype = GgmlType.GGML_TYPE_F16;
1719
/** intermediate type (FP32 or FP16) */
18-
GgmlType itype = GgmlType.GGML_TYPE_F16;
20+
public GgmlType itype = GgmlType.GGML_TYPE_F16;
1921

20-
// WhisperModel model;
21-
public PointerByReference model;
22-
// whisper_vocab vocab;
23-
// whisper_state * state = nullptr;
24-
public PointerByReference vocab;
25-
public PointerByReference state;
22+
public WhisperContextParams.ByValue params;
23+
24+
public Pointer model;
25+
public Pointer vocab;
26+
public Pointer state;
2627

2728
/** populated by whisper_init_from_file_with_params() */
28-
String path_model;
29-
WhisperContextParams params;
30-
31-
// public static class ByReference extends WhisperContext implements Structure.ByReference {
32-
// }
33-
//
34-
// public static class ByValue extends WhisperContext implements Structure.ByValue {
35-
// }
36-
//
37-
// @Override
38-
// protected List<String> getFieldOrder() {
39-
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
40-
// }
29+
public Pointer path_model;
30+
31+
@Override
32+
protected List<String> getFieldOrder() {
33+
return List.of("t_load_us", "t_start_us", "wtype", "itype",
34+
"params", "model", "vocab", "state", "path_model");
35+
}
4136
}

bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ public void initContext(String modelPath) throws FileNotFoundException {
4343
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
4444
* @param params - params to use when initialising the context
4545
*/
46-
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
46+
public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
4747
initContextImpl(modelPath, params);
4848
}
4949

50-
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
50+
private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
5151
if (ctx != null) {
5252
lib.whisper_free(ctx);
5353
}
@@ -69,15 +69,13 @@ private void initContextImpl(String modelPath, WhisperContextParams params) thro
6969

7070
/**
7171
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
72-
* Because this function allocates memory for the params, the caller must call either:
73-
* - call `whisper_free_context_params()`
74-
* - `Native.free(Pointer.nativeValue(pointer));`
72+
* Returns a ByValue instance to ensure proper parameter passing to native code.
7573
*/
76-
public WhisperContextParams getContextDefaultParams() {
77-
paramsPointer = lib.whisper_context_default_params_by_ref();
78-
WhisperContextParams params = new WhisperContextParams(paramsPointer);
79-
params.read();
80-
return params;
74+
public WhisperContextParams.ByValue getContextDefaultParams() {
75+
WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
76+
lib.whisper_context_default_params_by_ref());
77+
valueParams.read();
78+
return valueParams;
8179
}
8280

8381
/**
@@ -88,7 +86,7 @@ public WhisperContextParams getContextDefaultParams() {
8886
*
8987
* @param strategy - GREEDY
9088
*/
91-
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
89+
public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
9290
Pointer pointer;
9391

9492
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
@@ -104,7 +102,7 @@ public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy)
104102
pointer = beamParamsPointer;
105103
}
106104

107-
WhisperFullParams params = new WhisperFullParams(pointer);
105+
WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
108106
params.read();
109107
return params;
110108
}
@@ -138,15 +136,21 @@ private void freeParams() {
138136
}
139137

140138
/**
141-
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
139+
* Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text.
142140
* Not thread safe for same context
143141
* Uses the specified decoding strategy to obtain the text.
144142
*/
145-
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
143+
public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
146144
if (ctx == null) {
147145
throw new IllegalStateException("Model not initialised");
148146
}
149147

148+
/*
149+
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
150+
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
151+
valueParams.read();
152+
*/
153+
150154
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
151155
throw new IOException("Failed to process audio");
152156
}
@@ -163,12 +167,17 @@ public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData)
163167

164168
return str.toString().trim();
165169
}
170+
166171
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
167172
if (ctx == null) {
168173
throw new IllegalStateException("Model not initialised");
169174
}
170175

171-
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
176+
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
177+
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
178+
valueParams.read();
179+
180+
if (lib.whisper_full(ctx, valueParams, audioData, audioData.length) != 0) {
172181
throw new IOException("Failed to process audio");
173182
}
174183

bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public interface WhisperCppJnaLibrary extends Library {
3838
* @param params Pointer to whisper_context_params
3939
* @return Whisper context on success, null on failure
4040
*/
41-
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
41+
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
4242

4343
/**
4444
* Allocate (almost) all memory needed for the model by loading from a buffer.
@@ -180,12 +180,12 @@ public interface WhisperCppJnaLibrary extends Library {
180180
/**
181181
* @return the id of the specified language, returns -1 if not found.
182182
* Examples:
183-
* "de" -> 2
184-
* "german" -> 2
183+
* "de" -&gt; 2
184+
* "german" -&gt; 2
185185
*/
186186
int whisper_lang_id(String lang);
187187

188-
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
188+
/** @return the short string of the specified language id (e.g. 2 -&gt; "de"), returns nullptr if not found */
189189
String whisper_lang_str(int id);
190190

191191
/**
@@ -268,20 +268,21 @@ public interface WhisperCppJnaLibrary extends Library {
268268
void whisper_free_params(Pointer params);
269269

270270
/**
271-
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
271+
* Run the entire model: PCM -&gt; log mel spectrogram -&gt; encoder -&gt; decoder -&gt; text
272272
* Not thread safe for same context
273273
* Uses the specified decoding strategy to obtain the text.
274274
*/
275-
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
275+
int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
276276

277-
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
277+
public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
278+
//int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
278279

279280
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
280281
// Result is stored in the default state of the context
281282
// Not thread safe if executed in parallel on the same context.
282283
// It seems this approach can offer some speedup in some cases.
283284
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
284-
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
285+
int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
285286

286287
/**
287288
* Number of generated text segments.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.github.ggerganov.whispercpp.callbacks;
2+
3+
import com.sun.jna.Callback;
4+
5+
/**
6+
* Callback for aborting GGML computation
7+
* Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
8+
*/
9+
public interface GgmlAbortCallback extends Callback {
10+
/**
11+
* Return true to abort the computation, false to continue
12+
*
13+
* @param data User data passed to the callback
14+
* @return true to abort, false to continue
15+
*/
16+
boolean invoke(com.sun.jna.Pointer data);
17+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package io.github.ggerganov.whispercpp.params;
2+
import com.sun.jna.*;
3+
import java.util.Arrays;
4+
import java.util.List;
5+
6+
public class WhisperAhead extends Structure {
7+
8+
public int n_text_layer;
9+
10+
public int n_head;
11+
12+
public WhisperAhead() {
13+
super();
14+
}
15+
16+
public WhisperAhead(int textLayer, int head) {
17+
super();
18+
this.n_text_layer = textLayer;
19+
this.n_head = head;
20+
}
21+
22+
@Override
23+
protected List<String> getFieldOrder() {
24+
return Arrays.asList("n_text_layer", "n_head");
25+
}
26+
27+
public static class ByReference extends WhisperAhead implements Structure.ByReference {}
28+
29+
public static class ByValue extends WhisperAhead implements Structure.ByValue {}
30+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package io.github.ggerganov.whispercpp.params;
2+
import com.sun.jna.*;
3+
import java.util.Arrays;
4+
import java.util.List;
5+
6+
public class WhisperAheads extends Structure {
7+
public NativeLong n_heads;
8+
9+
public Pointer heads;
10+
11+
public WhisperAheads() {
12+
super();
13+
}
14+
15+
/**
16+
* Create alignment heads from an array of WhisperAhead objects
17+
*/
18+
public void setHeads(WhisperAhead[] aheadsArray) {
19+
this.n_heads = new NativeLong(aheadsArray.length);
20+
21+
int structSize = aheadsArray[0].size();
22+
Memory mem = new Memory(structSize * aheadsArray.length);
23+
24+
for (int i = 0; i < aheadsArray.length; i++) {
25+
aheadsArray[i].write();
26+
byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
27+
mem.write(i * structSize, buffer, 0, buffer.length);
28+
}
29+
30+
this.heads = mem;
31+
}
32+
33+
@Override
34+
protected List<String> getFieldOrder() {
35+
return Arrays.asList("n_heads", "heads");
36+
}
37+
38+
public static class ByReference extends WhisperAheads implements Structure.ByReference {}
39+
40+
public static class ByValue extends WhisperAheads implements Structure.ByValue {}
41+
}

0 commit comments

Comments
 (0)