Skip to content

ToolCalling fail on vLLM server 0.9.1 #3534

@lanyuanxiaoyao

Description

@lanyuanxiaoyao

Bug description
The same toolcalling function fails when the vLLM server is upgraded from 0.8.5-post1 to 0.9.1.

Environment
Spring AI 1.0.0

Steps to reproduce

docker run
      --rm
      -p 3000:3000
      -v /data/models:/models
      --name vllm-qwen3-4b
      --privileged=true
      --shm-size=4g
      -e VLLM_CPU_KVCACHE_SPACE=8
      vllm-server-cpu:0.8.5.post1
      --model /models/Qwen3-4B
      --served-model-name Qwen3/qwen3-4b
      --port 3000
      --enable-auto-tool-choice
      --tool-call-parser hermes
      --enable-reasoning
      --reasoning-parser deepseek_r1
package com.lanyuanxiaoyao.service.ai.chat;

import cn.hutool.core.util.StrUtil;
import java.net.http.HttpClient;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.openai.OpenAiChatModel;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.ai.tool.annotation.Tool;
import org.springframework.ai.tool.annotation.ToolParam;
import org.springframework.http.client.JdkClientHttpRequestFactory;
import org.springframework.http.client.reactive.JdkClientHttpConnector;
import org.springframework.web.client.RestClient;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.Disposable;

/**
 * @author lanyuanxiaoyao
 * @version 20250613
 */
public class TestSpringAiTools {
  public static void main(String[] args) {
    ChatClient client = ChatClient.builder(
            OpenAiChatModel.builder()
                .openAiApi(
                    OpenAiApi.builder()
                        .baseUrl("http://xxxx:xxx")
                        .restClientBuilder(restClientBuilder())
                        .webClientBuilder(webClientBuilder())
                        .build()
                )
                .defaultOptions(
                    OpenAiChatOptions.builder()
                        .model("Qwen3/qwen3-1.7b")
                        .build()
                )
                .build()
        )
        .build();
    Disposable disposable = client.prompt()
        .tools(new TestTool())
        .user("Call the function 'submit' to generate a joke with 'pig'")
        .stream()
        .content()
        .subscribe(System.out::println);
    while (!disposable.isDisposed()) {
    }
  }

  private static HttpClient httpClient() {
    return HttpClient.newBuilder()
        .version(HttpClient.Version.HTTP_1_1)
        .build();
  }

  private static RestClient.Builder restClientBuilder() {
    return RestClient.builder()
        .requestFactory(new JdkClientHttpRequestFactory(httpClient()));
  }

  private static WebClient.Builder webClientBuilder() {
    return WebClient.builder()
        .clientConnector(new JdkClientHttpConnector(httpClient()));
  }

  public static final class TestTool {
    @Tool(description = "Input a name of animal and return a joke for it")
    public String submit(@ToolParam(description = "Name of Animal") String animalName) {
      return StrUtil.format("{} is falling into a hole", animalName);
    }
  }
}

Exception:

2025-06-13 17:40:22.741 ERROR [b12s9] [HttpClient-2-Worker-3] org.springframework.ai.chat.model.MessageAggregator #@# Aggregation Error

java.util.NoSuchElementException: null
	at java.base/java.util.ArrayList$Itr.next(ArrayList.java:970)
	at org.springframework.ai.deepseek.api.DeepSeekStreamFunctionCallingHelper.merge(DeepSeekStreamFunctionCallingHelper.java:97)
	at org.springframework.ai.deepseek.api.DeepSeekStreamFunctionCallingHelper.merge(DeepSeekStreamFunctionCallingHelper.java:71)
	at org.springframework.ai.deepseek.api.DeepSeekStreamFunctionCallingHelper.merge(DeepSeekStreamFunctionCallingHelper.java:57)
	at org.springframework.ai.deepseek.api.DeepSeekApi.lambda$chatCompletionStream$5(DeepSeekApi.java:188)
	at reactor.core.publisher.MonoReduceSeed$ReduceSeedSubscriber.onNext(MonoReduceSeed.java:116)
	at reactor.core.publisher.FluxWindowPredicate$WindowFlux.drainRegular(FluxWindowPredicate.java:670)
	at reactor.core.publisher.FluxWindowPredicate$WindowFlux.drain(FluxWindowPredicate.java:748)
	at reactor.core.publisher.FluxWindowPredicate$WindowFlux.onNext(FluxWindowPredicate.java:790)
	at reactor.core.publisher.FluxWindowPredicate$WindowPredicateMain.onNext(FluxWindowPredicate.java:241)
	at reactor.core.publisher.FluxMap$MapSubscriber.onNext(FluxMap.java:122)
	at reactor.core.publisher.FluxMap$MapSubscriber.onNext(FluxMap.java:122)
	at reactor.core.publisher.FluxFilter$FilterSubscriber.onNext(FluxFilter.java:113)
	at reactor.core.publisher.FluxTakeUntil$TakeUntilPredicateSubscriber.onNext(FluxTakeUntil.java:95)
	at reactor.core.publisher.MonoFlatMapMany$FlatMapManyInner.onNext(MonoFlatMapMany.java:251)
	at reactor.core.publisher.FluxOnErrorResume$ResumeSubscriber.onNext(FluxOnErrorResume.java:79)
	at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.onNext(FluxOnAssembly.java:539)
	at reactor.core.publisher.FluxConcatMapNoPrefetch$FluxConcatMapNoPrefetchSubscriber.innerNext(FluxConcatMapNoPrefetch.java:259)
	at reactor.core.publisher.FluxConcatMap$ConcatMapInner.onNext(FluxConcatMap.java:865)
	at reactor.core.publisher.FluxConcatMap$WeakScalarSubscription.request(FluxConcatMap.java:480)
	at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.set(Operators.java:2367)
	at reactor.core.publisher.FluxConcatMapNoPrefetch$FluxConcatMapNoPrefetchSubscriber.onNext(FluxConcatMapNoPrefetch.java:202)
	at reactor.core.publisher.FluxBufferPredicate$BufferPredicateSubscriber.onNextNewBuffer(FluxBufferPredicate.java:317)
	at reactor.core.publisher.FluxBufferPredicate$BufferPredicateSubscriber.tryOnNext(FluxBufferPredicate.java:227)
	at reactor.core.publisher.FluxBufferPredicate$BufferPredicateSubscriber.onNext(FluxBufferPredicate.java:200)
	at reactor.core.publisher.FluxPeekFuseable$PeekFuseableConditionalSubscriber.onNext(FluxPeekFuseable.java:503)
	at reactor.core.publisher.FluxMapFuseable$MapFuseableConditionalSubscriber.onNext(FluxMapFuseable.java:299)
	at reactor.core.publisher.FluxContextWrite$ContextWriteSubscriber.onNext(FluxContextWrite.java:107)
	at reactor.core.publisher.FluxDoFinally$DoFinallySubscriber.onNext(FluxDoFinally.java:113)
	at reactor.core.publisher.FluxConcatArray$ConcatArraySubscriber.onNext(FluxConcatArray.java:180)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.drainAsync(FluxFlattenIterable.java:453)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.drain(FluxFlattenIterable.java:724)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.onNext(FluxFlattenIterable.java:256)
	at reactor.core.publisher.FluxPublish$PublishSubscriber.drain(FluxPublish.java:571)
	at reactor.core.publisher.FluxPublish$PublishSubscriber.onNext(FluxPublish.java:310)
	at reactor.core.publisher.FluxContextWrite$ContextWriteSubscriber.onNext(FluxContextWrite.java:107)
	at reactor.core.publisher.FluxMapFuseable$MapFuseableConditionalSubscriber.onNext(FluxMapFuseable.java:299)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.drainAsync(FluxFlattenIterable.java:453)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.drain(FluxFlattenIterable.java:724)
	at reactor.core.publisher.FluxFlattenIterable$FlattenIterableSubscriber.onNext(FluxFlattenIterable.java:256)
	at reactor.adapter.JdkFlowAdapter$SubscriberToRS.onNext(JdkFlowAdapter.java:150)
	at java.net.http/jdk.internal.net.http.ResponseSubscribers$PublishingBodySubscriber.onNext(ResponseSubscribers.java:1006)
	at java.net.http/jdk.internal.net.http.ResponseSubscribers$PublishingBodySubscriber.onNext(ResponseSubscribers.java:846)
	at java.net.http/jdk.internal.net.http.Http1Response$Http1BodySubscriber.onNext(Http1Response.java:382)
	at java.net.http/jdk.internal.net.http.Http1Response$Http1BodySubscriber.onNext(Http1Response.java:297)
	at java.net.http/jdk.internal.net.http.ResponseContent$ChunkedBodyParser.accept(ResponseContent.java:229)
	at java.net.http/jdk.internal.net.http.ResponseContent$ChunkedBodyParser.accept(ResponseContent.java:129)
	at java.net.http/jdk.internal.net.http.Http1Response$BodyReader.handle(Http1Response.java:790)
	at java.net.http/jdk.internal.net.http.Http1Response$BodyReader.handle(Http1Response.java:720)
	at java.net.http/jdk.internal.net.http.Http1Response$Receiver.accept(Http1Response.java:612)
	at java.net.http/jdk.internal.net.http.Http1Response$BodyReader.tryAsyncReceive(Http1Response.java:750)
	at java.net.http/jdk.internal.net.http.Http1AsyncReceiver.flush(Http1AsyncReceiver.java:233)
	at java.net.http/jdk.internal.net.http.common.SequentialScheduler$LockingRestartableTask.run(SequentialScheduler.java:205)
	at java.net.http/jdk.internal.net.http.common.SequentialScheduler$CompleteRestartableTask.run(SequentialScheduler.java:149)
	at java.net.http/jdk.internal.net.http.common.SequentialScheduler$SchedulableTask.run(SequentialScheduler.java:230)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)

Of cause it is ok on vLLM 0.8.5-post1,maybe something change in 0.9

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions