Skip to content

Commit b9593fe

Browse files
committed
Remove redundant numGQA option from Ollama options
1 parent 5642521 commit b9593fe

File tree

3 files changed

+8
-31
lines changed

3 files changed

+8
-31
lines changed

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,6 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
7171
*/
7272
@JsonProperty("num_batch") private Integer numBatch;
7373

74-
/**
75-
* The number of GQA groups in the transformer layer. Required for some models,
76-
* for example it is 8 for llama2:70b.
77-
*/
78-
@JsonProperty("num_gqa") private Integer numGQA;
79-
8074
/**
8175
* The number of layers to send to the GPU(s). On macOS, it defaults to 1
8276
* to enable metal support, 0 to disable.
@@ -327,11 +321,6 @@ public OllamaOptions withNumBatch(Integer numBatch) {
327321
return this;
328322
}
329323

330-
public OllamaOptions withNumGQA(Integer numGQA) {
331-
this.numGQA = numGQA;
332-
return this;
333-
}
334-
335324
public OllamaOptions withNumGPU(Integer numGPU) {
336325
this.numGPU = numGPU;
337326
return this;
@@ -518,14 +507,6 @@ public void setNumBatch(Integer numBatch) {
518507
this.numBatch = numBatch;
519508
}
520509

521-
public Integer getNumGQA() {
522-
return this.numGQA;
523-
}
524-
525-
public void setNumGQA(Integer numGQA) {
526-
this.numGQA = numGQA;
527-
}
528-
529510
public Integer getNumGPU() {
530511
return this.numGPU;
531512
}
@@ -795,7 +776,6 @@ public static OllamaOptions fromOptions(OllamaOptions fromOptions) {
795776
.withUseNUMA(fromOptions.getUseNUMA())
796777
.withNumCtx(fromOptions.getNumCtx())
797778
.withNumBatch(fromOptions.getNumBatch())
798-
.withNumGQA(fromOptions.getNumGQA())
799779
.withNumGPU(fromOptions.getNumGPU())
800780
.withMainGPU(fromOptions.getMainGPU())
801781
.withLowVRAM(fromOptions.getLowVRAM())
@@ -837,14 +817,13 @@ public boolean equals(Object o) {
837817
return Objects.equals(model, that.model) && Objects.equals(format, that.format)
838818
&& Objects.equals(keepAlive, that.keepAlive) && Objects.equals(useNUMA, that.useNUMA)
839819
&& Objects.equals(numCtx, that.numCtx) && Objects.equals(numBatch, that.numBatch)
840-
&& Objects.equals(numGQA, that.numGQA) && Objects.equals(numGPU, that.numGPU)
841-
&& Objects.equals(mainGPU, that.mainGPU) && Objects.equals(lowVRAM, that.lowVRAM)
842-
&& Objects.equals(f16KV, that.f16KV) && Objects.equals(logitsAll, that.logitsAll)
843-
&& Objects.equals(vocabOnly, that.vocabOnly) && Objects.equals(useMMap, that.useMMap)
844-
&& Objects.equals(useMLock, that.useMLock) && Objects.equals(numThread, that.numThread)
845-
&& Objects.equals(numKeep, that.numKeep) && Objects.equals(seed, that.seed)
846-
&& Objects.equals(numPredict, that.numPredict) && Objects.equals(topK, that.topK)
847-
&& Objects.equals(topP, that.topP) && Objects.equals(tfsZ, that.tfsZ)
820+
&& Objects.equals(numGPU, that.numGPU) && Objects.equals(mainGPU, that.mainGPU)
821+
&& Objects.equals(lowVRAM, that.lowVRAM) && Objects.equals(f16KV, that.f16KV)
822+
&& Objects.equals(logitsAll, that.logitsAll) && Objects.equals(vocabOnly, that.vocabOnly)
823+
&& Objects.equals(useMMap, that.useMMap) && Objects.equals(useMLock, that.useMLock)
824+
&& Objects.equals(numThread, that.numThread) && Objects.equals(numKeep, that.numKeep)
825+
&& Objects.equals(seed, that.seed) && Objects.equals(numPredict, that.numPredict)
826+
&& Objects.equals(topK, that.topK) && Objects.equals(topP, that.topP) && Objects.equals(tfsZ, that.tfsZ)
848827
&& Objects.equals(typicalP, that.typicalP) && Objects.equals(repeatLastN, that.repeatLastN)
849828
&& Objects.equals(temperature, that.temperature) && Objects.equals(repeatPenalty, that.repeatPenalty)
850829
&& Objects.equals(presencePenalty, that.presencePenalty)
@@ -858,7 +837,7 @@ public boolean equals(Object o) {
858837
@Override
859838
public int hashCode() {
860839
return Objects.hash(this.model, this.format, this.keepAlive, this.useNUMA, this.numCtx, this.numBatch,
861-
this.numGQA, numGPU, mainGPU, lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly, this.useMMap,
840+
this.numGPU, this.mainGPU, lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly, this.useMMap,
862841
this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK, this.topP, tfsZ,
863842
this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty, this.presencePenalty,
864843
this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta, this.penalizeNewline,

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ The remaining `options` properties are based on the link:https://github.com/olla
7575
| spring.ai.ollama.chat.options.numa | Whether to use NUMA. | false
7676
| spring.ai.ollama.chat.options.num-ctx | Sets the size of the context window used to generate the next token. | 2048
7777
| spring.ai.ollama.chat.options.num-batch | ??? | 512
78-
| spring.ai.ollama.chat.options.num-gqa | The number of GQA groups in the transformer layer. Required for some models, for example, it is 8 for llama2:70b. | 1
7978
| spring.ai.ollama.chat.options.num-gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. 1 here indicates that NumGPU should be set dynamically | -1
8079
| spring.ai.ollama.chat.options.main-gpu | ??? | -
8180
| spring.ai.ollama.chat.options.low-vram | ??? | false

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ The remaining `options` properties are based on the link:https://github.com/olla
7878
| spring.ai.ollama.embedding.options.numa | Whether to use NUMA. | false
7979
| spring.ai.ollama.embedding.options.num-ctx | Sets the size of the context window used to generate the next token. | 2048
8080
| spring.ai.ollama.embedding.options.num-batch | ??? | -
81-
| spring.ai.ollama.embedding.options.num-gqa | The number of GQA groups in the transformer layer. Required for some models, for example, it is 8 for llama2:70b. | -
8281
| spring.ai.ollama.embedding.options.num-gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | -
8382
| spring.ai.ollama.embedding.options.main-gpu | ??? | -
8483
| spring.ai.ollama.embedding.options.low-vram | ??? | -

0 commit comments

Comments
 (0)