update llm pipeline PR (livepeer#3336)

ad-astra-video · ad-astra-video · commit 601d30c244da · 2025-01-16T07:45:12.000-06:00
diff --git a/core/ai.go b/core/ai.go
@@ -23,7 +23,7 @@ type AI interface {
 	ImageToVideo(context.Context, worker.GenImageToVideoMultipartRequestBody) (*worker.VideoResponse, error)
 	Upscale(context.Context, worker.GenUpscaleMultipartRequestBody) (*worker.ImageResponse, error)
 	AudioToText(context.Context, worker.GenAudioToTextMultipartRequestBody) (*worker.TextResponse, error)
-	LLM(context.Context, worker.GenLLMFormdataRequestBody) (interface{}, error)
+	LLM(context.Context, worker.GenLLMJSONRequestBody) (interface{}, error)
 	SegmentAnything2(context.Context, worker.GenSegmentAnything2MultipartRequestBody) (*worker.MasksResponse, error)
 	ImageToText(context.Context, worker.GenImageToTextMultipartRequestBody) (*worker.ImageToTextResponse, error)
 	TextToSpeech(context.Context, worker.GenTextToSpeechJSONRequestBody) (*worker.AudioResponse, error)
diff --git a/core/ai_test.go b/core/ai_test.go
@@ -690,8 +690,11 @@ func (a *stubAIWorker) SegmentAnything2(ctx context.Context, req worker.GenSegme
 	return &worker.MasksResponse{Logits: "logits", Masks: "masks", Scores: "scores"}, nil
 }
 
-func (a *stubAIWorker) LLM(ctx context.Context, req worker.GenLLMFormdataRequestBody) (interface{}, error) {
-	return &worker.LLMResponse{Response: "response tokens", TokensUsed: 10}, nil
+func (a *stubAIWorker) LLM(ctx context.Context, req worker.GenLLMJSONRequestBody) (interface{}, error) {
+	var choices []worker.LLMChoice
+	choices = append(choices, worker.LLMChoice{Delta: &worker.LLMMessage{Content: "choice1", Role: "assistant"}, Index: 0})
+	tokensUsed := worker.LLMTokenUsage{PromptTokens: 40, CompletionTokens: 10, TotalTokens: 50}
+	return &worker.LLMResponse{Choices: choices, Created: 1, Model: "llm_model", TokensUsed: tokensUsed}, nil
 }
 
 func (a *stubAIWorker) ImageToText(ctx context.Context, req worker.GenImageToTextMultipartRequestBody) (*worker.ImageToTextResponse, error) {
diff --git a/core/ai_worker.go b/core/ai_worker.go
@@ -824,14 +824,14 @@ func (orch *orchestrator) SegmentAnything2(ctx context.Context, requestID string
 }
 
 // Return type is LLMResponse, but a stream is available as well as chan(string)
-func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.GenLLMFormdataRequestBody) (interface{}, error) {
+func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.GenLLMJSONRequestBody) (interface{}, error) {
 	// local AIWorker processes job if combined orchestrator/ai worker
 	if orch.node.AIWorker != nil {
 		// no file response to save, response is text sent back to gateway
 		return orch.node.AIWorker.LLM(ctx, req)
 	}
 
-	res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "llm", *req.ModelId, "", AIJobRequestData{Request: req})
+	res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "llm", *req.Model, "", AIJobRequestData{Request: req})
 	if err != nil {
 		return nil, err
 	}
@@ -842,7 +842,7 @@ func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.
 		if err != nil {
 			clog.Errorf(ctx, "Error saving remote ai result err=%q", err)
 			if monitor.Enabled {
-				monitor.AIResultSaveError(ctx, "llm", *req.ModelId, string(monitor.SegmentUploadErrorUnknown))
+				monitor.AIResultSaveError(ctx, "llm", *req.Model, string(monitor.SegmentUploadErrorUnknown))
 			}
 			return nil, err
 
@@ -1087,7 +1087,7 @@ func (n *LivepeerNode) SegmentAnything2(ctx context.Context, req worker.GenSegme
 	return n.AIWorker.SegmentAnything2(ctx, req)
 }
 
-func (n *LivepeerNode) LLM(ctx context.Context, req worker.GenLLMFormdataRequestBody) (interface{}, error) {
+func (n *LivepeerNode) LLM(ctx context.Context, req worker.GenLLMJSONRequestBody) (interface{}, error) {
 	return n.AIWorker.LLM(ctx, req)
 }
 
diff --git a/go.mod b/go.mod
@@ -14,7 +14,7 @@ require (
 	github.com/google/uuid v1.6.0
 	github.com/jaypipes/ghw v0.10.0
 	github.com/jaypipes/pcidb v1.0.0
-	github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3
+	github.com/livepeer/ai-worker v0.13.1
 	github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b
 	github.com/livepeer/livepeer-data v0.7.5-0.20231004073737-06f1f383fb18
 	github.com/livepeer/lpms v0.0.0-20241203012405-fc96cadb6393
diff --git a/go.sum b/go.sum
@@ -607,8 +607,8 @@ github.com/libp2p/go-netroute v0.2.0 h1:0FpsbsvuSnAhXFnCY0VLFbJOzaK0VnP0r1QT/o4n
 github.com/libp2p/go-netroute v0.2.0/go.mod h1:Vio7LTzZ+6hoT4CMZi5/6CpY3Snzh2vgZhWgxMNwlQI=
 github.com/libp2p/go-openssl v0.1.0 h1:LBkKEcUv6vtZIQLVTegAil8jbNpJErQ9AnT+bWV+Ooo=
 github.com/libp2p/go-openssl v0.1.0/go.mod h1:OiOxwPpL3n4xlenjx2h7AwSGaFSC/KZvf6gNdOBQMtc=
-github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3 h1:uutmGZq2YdIKnKhn6QGHtGnKfBGYAUMMOr44LXYs23w=
-github.com/livepeer/ai-worker v0.12.7-0.20241219141308-c19289d128a3/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM=
+github.com/livepeer/ai-worker v0.13.1 h1:BnqzmBD/E5gHM0P6UXt9M2/bZwU3ZryEfNpbW+NYJr0=
+github.com/livepeer/ai-worker v0.13.1/go.mod h1:ZibfmZQQh6jFvnPLHeIPInghfX5ln+JpN845nS3GuyM=
 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b h1:VQcnrqtCA2UROp7q8ljkh2XA/u0KRgVv0S1xoUvOweE=
 github.com/livepeer/go-tools v0.3.6-0.20240130205227-92479de8531b/go.mod h1:hwJ5DKhl+pTanFWl+EUpw1H7ukPO/H+MFpgA7jjshzw=
 github.com/livepeer/joy4 v0.1.2-0.20191121080656-b2fea45cbded h1:ZQlvR5RB4nfT+cOQee+WqmaDOgGtP2oDMhcVvR4L0yA=
diff --git a/server/ai_http.go b/server/ai_http.go
@@ -66,7 +66,7 @@ func startAIServer(lp *lphttp) error {
 	lp.transRPC.Handle("/image-to-video", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToVideoMultipartRequestBody])))
 	lp.transRPC.Handle("/upscale", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenUpscaleMultipartRequestBody])))
 	lp.transRPC.Handle("/audio-to-text", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenAudioToTextMultipartRequestBody])))
-	lp.transRPC.Handle("/llm", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenLLMFormdataRequestBody])))
+	lp.transRPC.Handle("/llm", oapiReqValidator(aiHttpHandle(lp, jsonDecoder[worker.GenLLMJSONRequestBody])))
 	lp.transRPC.Handle("/segment-anything-2", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody])))
 	lp.transRPC.Handle("/image-to-text", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToTextMultipartRequestBody])))
 	lp.transRPC.Handle("/text-to-speech", oapiReqValidator(aiHttpHandle(lp, jsonDecoder[worker.GenTextToSpeechJSONRequestBody])))
@@ -405,10 +405,10 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request
 			return
 		}
 		outPixels *= 1000 // Convert to milliseconds
-	case worker.GenLLMFormdataRequestBody:
+	case worker.GenLLMJSONRequestBody:
 		pipeline = "llm"
 		cap = core.Capability_LLM
-		modelID = *v.ModelId
+		modelID = *v.Model
 		submitFn = func(ctx context.Context) (interface{}, error) {
 			return orch.LLM(ctx, requestID, v)
 		}
@@ -586,7 +586,7 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request
 	}
 
 	// Check if the response is a streaming response
-	if streamChan, ok := resp.(<-chan worker.LlmStreamChunk); ok {
+	if streamChan, ok := resp.(<-chan *worker.LLMResponse); ok {
 		glog.Infof("Streaming response for request id=%v", requestID)
 
 		// Set headers for SSE
@@ -610,7 +610,7 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request
 			fmt.Fprintf(w, "data: %s\n\n", data)
 			flusher.Flush()
 
-			if chunk.Done {
+			if chunk.Choices[0].FinishReason != nil && *chunk.Choices[0].FinishReason != "" {
 				break
 			}
 		}
@@ -685,8 +685,8 @@ func (h *lphttp) AIResults() http.Handler {
 		case "text/event-stream":
 			resultType = "streaming"
 			glog.Infof("Received %s response from remote worker=%s taskId=%d", resultType, r.RemoteAddr, tid)
-			resChan := make(chan worker.LlmStreamChunk, 100)
-			workerResult.Results = (<-chan worker.LlmStreamChunk)(resChan)
+			resChan := make(chan *worker.LLMResponse, 100)
+			workerResult.Results = (<-chan *worker.LLMResponse)(resChan)
 
 			defer r.Body.Close()
 			defer close(resChan)
@@ -705,12 +705,12 @@ func (h *lphttp) AIResults() http.Handler {
 					line := scanner.Text()
 					if strings.HasPrefix(line, "data: ") {
 						data := strings.TrimPrefix(line, "data: ")
-						var chunk worker.LlmStreamChunk
+						var chunk worker.LLMResponse
 						if err := json.Unmarshal([]byte(data), &chunk); err != nil {
 							clog.Errorf(ctx, "Error unmarshaling stream data: %v", err)
 							continue
 						}
-						resChan <- chunk
+						resChan <- &chunk
 					}
 				}
 			}
diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go
@@ -259,20 +259,19 @@ func (ls *LivepeerServer) LLM() http.Handler {
 		requestID := string(core.RandomManifestID())
 		ctx = clog.AddVal(ctx, "request_id", requestID)
 
-		var req worker.GenLLMFormdataRequestBody
-
-		multiRdr, err := r.MultipartReader()
-		if err != nil {
+		var req worker.GenLLMJSONRequestBody
+		if err := jsonDecoder(&req, r); err != nil {
 			respondJsonError(ctx, w, err, http.StatusBadRequest)
 			return
 		}
 
-		if err := runtime.BindMultipart(&req, *multiRdr); err != nil {
-			respondJsonError(ctx, w, err, http.StatusBadRequest)
+		//check required fields
+		if req.Model == nil || req.Messages == nil || req.Stream == nil || req.MaxTokens == nil || len(req.Messages) == 0 {
+			respondJsonError(ctx, w, errors.New("missing required fields"), http.StatusBadRequest)
 			return
 		}
 
-		clog.V(common.VERBOSE).Infof(ctx, "Received LLM request prompt=%v model_id=%v stream=%v", req.Prompt, *req.ModelId, *req.Stream)
+		clog.V(common.VERBOSE).Infof(ctx, "Received LLM request model_id=%v stream=%v", *req.Model, *req.Stream)
 
 		orchAddr := r.Header.Get("OrchAddr")
 		params := aiRequestParams{
@@ -295,9 +294,9 @@ func (ls *LivepeerServer) LLM() http.Handler {
 		}
 
 		took := time.Since(start)
-		clog.V(common.VERBOSE).Infof(ctx, "Processed LLM request prompt=%v model_id=%v took=%v", req.Prompt, *req.ModelId, took)
+		clog.V(common.VERBOSE).Infof(ctx, "Processed LLM request model_id=%v took=%v", *req.Model, took)
 
-		if streamChan, ok := resp.(chan worker.LlmStreamChunk); ok {
+		if streamChan, ok := resp.(chan *worker.LLMResponse); ok {
 			// Handle streaming response (SSE)
 			w.Header().Set("Content-Type", "text/event-stream")
 			w.Header().Set("Cache-Control", "no-cache")
@@ -307,7 +306,7 @@ func (ls *LivepeerServer) LLM() http.Handler {
 				data, _ := json.Marshal(chunk)
 				fmt.Fprintf(w, "data: %s\n\n", data)
 				w.(http.Flusher).Flush()
-				if chunk.Done {
+				if chunk.Choices[0].FinishReason != nil && *chunk.Choices[0].FinishReason != "" {
 					break
 				}
 			}
diff --git a/server/ai_process.go b/server/ai_process.go
@@ -1100,14 +1100,14 @@ func CalculateLLMLatencyScore(took time.Duration, tokensUsed int) float64 {
 	return took.Seconds() / float64(tokensUsed)
 }
 
-func processLLM(ctx context.Context, params aiRequestParams, req worker.GenLLMFormdataRequestBody) (interface{}, error) {
+func processLLM(ctx context.Context, params aiRequestParams, req worker.GenLLMJSONRequestBody) (interface{}, error) {
 	resp, err := processAIRequest(ctx, params, req)
 	if err != nil {
 		return nil, err
 	}
 
 	if req.Stream != nil && *req.Stream {
-		streamChan, ok := resp.(chan worker.LlmStreamChunk)
+		streamChan, ok := resp.(chan *worker.LLMResponse)
 		if !ok {
 			return nil, errors.New("unexpected response type for streaming request")
 		}
@@ -1122,20 +1122,12 @@ func processLLM(ctx context.Context, params aiRequestParams, req worker.GenLLMFo
 	return llmResp, nil
 }
 
-func submitLLM(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLLMFormdataRequestBody) (interface{}, error) {
-	var buf bytes.Buffer
-	mw, err := worker.NewLLMMultipartWriter(&buf, req)
-	if err != nil {
-		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, nil)
-		}
-		return nil, err
-	}
+func submitLLM(ctx context.Context, params aiRequestParams, sess *AISession, req worker.GenLLMJSONRequestBody) (interface{}, error) {
 
 	client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient))
 	if err != nil {
 		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, sess.OrchestratorInfo)
+			monitor.AIRequestError(err.Error(), "llm", *req.Model, sess.OrchestratorInfo)
 		}
 		return nil, err
 	}
@@ -1148,17 +1140,17 @@ func submitLLM(ctx context.Context, params aiRequestParams, sess *AISession, req
 	setHeaders, balUpdate, err := prepareAIPayment(ctx, sess, int64(*req.MaxTokens))
 	if err != nil {
 		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, sess.OrchestratorInfo)
+			monitor.AIRequestError(err.Error(), "llm", *req.Model, sess.OrchestratorInfo)
 		}
 		return nil, err
 	}
 	defer completeBalanceUpdate(sess.BroadcastSession, balUpdate)
 
 	start := time.Now()
-	resp, err := client.GenLLMWithBody(ctx, mw.FormDataContentType(), &buf, setHeaders)
+	resp, err := client.GenLLM(ctx, req, setHeaders)
 	if err != nil {
 		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, sess.OrchestratorInfo)
+			monitor.AIRequestError(err.Error(), "llm", *req.Model, sess.OrchestratorInfo)
 		}
 		return nil, err
 	}
@@ -1168,83 +1160,90 @@ func submitLLM(ctx context.Context, params aiRequestParams, sess *AISession, req
 		return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
 	}
 
+	// We treat a response as "receiving change" where the change is the difference between the credit and debit for the update
+	// TODO: move to after receive stream response in handleSSEStream and handleNonStreamingResponse to count input tokens
+	if balUpdate != nil {
+		balUpdate.Status = ReceivedChange
+	}
+
 	if req.Stream != nil && *req.Stream {
 		return handleSSEStream(ctx, resp.Body, sess, req, start)
 	}
 
 	return handleNonStreamingResponse(ctx, resp.Body, sess, req, start)
 }
 
-func handleSSEStream(ctx context.Context, body io.ReadCloser, sess *AISession, req worker.GenLLMFormdataRequestBody, start time.Time) (chan worker.LlmStreamChunk, error) {
-	streamChan := make(chan worker.LlmStreamChunk, 100)
+func handleSSEStream(ctx context.Context, body io.ReadCloser, sess *AISession, req worker.GenLLMJSONRequestBody, start time.Time) (chan *worker.LLMResponse, error) {
+	streamChan := make(chan *worker.LLMResponse, 100)
 	go func() {
 		defer close(streamChan)
 		defer body.Close()
 		scanner := bufio.NewScanner(body)
-		var totalTokens int
+		var totalTokens worker.LLMTokenUsage
 		for scanner.Scan() {
 			line := scanner.Text()
 			if strings.HasPrefix(line, "data: ") {
 				data := strings.TrimPrefix(line, "data: ")
-				if data == "[DONE]" {
-					streamChan <- worker.LlmStreamChunk{Done: true, TokensUsed: totalTokens}
-					break
-				}
-				var chunk worker.LlmStreamChunk
+
+				var chunk worker.LLMResponse
 				if err := json.Unmarshal([]byte(data), &chunk); err != nil {
 					clog.Errorf(ctx, "Error unmarshaling SSE data: %v", err)
 					continue
 				}
-				totalTokens += chunk.TokensUsed
-				streamChan <- chunk
+				totalTokens = chunk.TokensUsed
+				streamChan <- &chunk
+				//check if stream is finished
+				if chunk.Choices[0].FinishReason != nil && *chunk.Choices[0].FinishReason != "" {
+					break
+				}
 			}
 		}
 		if err := scanner.Err(); err != nil {
 			clog.Errorf(ctx, "Error reading SSE stream: %v", err)
 		}
 
 		took := time.Since(start)
-		sess.LatencyScore = CalculateLLMLatencyScore(took, totalTokens)
+		sess.LatencyScore = CalculateLLMLatencyScore(took, totalTokens.TotalTokens)
 
 		if monitor.Enabled {
 			var pricePerAIUnit float64
 			if priceInfo := sess.OrchestratorInfo.GetPriceInfo(); priceInfo != nil && priceInfo.PixelsPerUnit != 0 {
 				pricePerAIUnit = float64(priceInfo.PricePerUnit) / float64(priceInfo.PixelsPerUnit)
 			}
-			monitor.AIRequestFinished(ctx, "llm", *req.ModelId, monitor.AIJobInfo{LatencyScore: sess.LatencyScore, PricePerUnit: pricePerAIUnit}, sess.OrchestratorInfo)
+			monitor.AIRequestFinished(ctx, "llm", *req.Model, monitor.AIJobInfo{LatencyScore: sess.LatencyScore, PricePerUnit: pricePerAIUnit}, sess.OrchestratorInfo)
 		}
 	}()
 
 	return streamChan, nil
 }
 
-func handleNonStreamingResponse(ctx context.Context, body io.ReadCloser, sess *AISession, req worker.GenLLMFormdataRequestBody, start time.Time) (*worker.LLMResponse, error) {
+func handleNonStreamingResponse(ctx context.Context, body io.ReadCloser, sess *AISession, req worker.GenLLMJSONRequestBody, start time.Time) (*worker.LLMResponse, error) {
 	data, err := io.ReadAll(body)
 	defer body.Close()
 	if err != nil {
 		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, sess.OrchestratorInfo)
+			monitor.AIRequestError(err.Error(), "llm", *req.Model, sess.OrchestratorInfo)
 		}
 		return nil, err
 	}
 
 	var res worker.LLMResponse
 	if err := json.Unmarshal(data, &res); err != nil {
 		if monitor.Enabled {
-			monitor.AIRequestError(err.Error(), "llm", *req.ModelId, sess.OrchestratorInfo)
+			monitor.AIRequestError(err.Error(), "llm", *req.Model, sess.OrchestratorInfo)
 		}
 		return nil, err
 	}
 
 	took := time.Since(start)
-	sess.LatencyScore = CalculateLLMLatencyScore(took, res.TokensUsed)
+	sess.LatencyScore = CalculateLLMLatencyScore(took, res.TokensUsed.TotalTokens)
 
 	if monitor.Enabled {
 		var pricePerAIUnit float64
 		if priceInfo := sess.OrchestratorInfo.GetPriceInfo(); priceInfo != nil && priceInfo.PixelsPerUnit != 0 {
 			pricePerAIUnit = float64(priceInfo.PricePerUnit) / float64(priceInfo.PixelsPerUnit)
 		}
-		monitor.AIRequestFinished(ctx, "llm", *req.ModelId, monitor.AIJobInfo{LatencyScore: sess.LatencyScore, PricePerUnit: pricePerAIUnit}, sess.OrchestratorInfo)
+		monitor.AIRequestFinished(ctx, "llm", *req.Model, monitor.AIJobInfo{LatencyScore: sess.LatencyScore, PricePerUnit: pricePerAIUnit}, sess.OrchestratorInfo)
 	}
 
 	return &res, nil
@@ -1403,16 +1402,16 @@ func processAIRequest(ctx context.Context, params aiRequestParams, req interface
 		submitFn = func(ctx context.Context, params aiRequestParams, sess *AISession) (interface{}, error) {
 			return submitAudioToText(ctx, params, sess, v)
 		}
-	case worker.GenLLMFormdataRequestBody:
+	case worker.GenLLMJSONRequestBody:
 		cap = core.Capability_LLM
 		modelID = defaultLLMModelID
-		if v.ModelId != nil {
-			modelID = *v.ModelId
+		if v.Model != nil {
+			modelID = *v.Model
 		}
 		submitFn = func(ctx context.Context, params aiRequestParams, sess *AISession) (interface{}, error) {
 			return submitLLM(ctx, params, sess, v)
 		}
-		ctx = clog.AddVal(ctx, "prompt", v.Prompt)
+
 	case worker.GenSegmentAnything2MultipartRequestBody:
 		cap = core.Capability_SegmentAnything2
 		modelID = defaultSegmentAnything2ModelID
diff --git a/server/ai_process_test.go b/server/ai_process_test.go
@@ -13,7 +13,7 @@ func Test_submitLLM(t *testing.T) {
 		ctx    context.Context
 		params aiRequestParams
 		sess   *AISession
-		req    worker.GenLLMFormdataRequestBody
+		req    worker.GenLLMJSONRequestBody
 	}
 	tests := []struct {
 		name    string
diff --git a/server/ai_worker.go b/server/ai_worker.go
diff --git a/server/ai_worker_test.go b/server/ai_worker_test.go
diff --git a/server/rpc.go b/server/rpc.go
diff --git a/server/rpc_test.go b/server/rpc_test.go

Original file line number	Diff line number	Diff line change
`@@ -690,8 +690,11 @@ func (a *stubAIWorker) SegmentAnything2(ctx context.Context, req worker.GenSegme`
`690`	`690`	`return &worker.MasksResponse{Logits: "logits", Masks: "masks", Scores: "scores"}, nil`
`691`	`691`	`}`
`692`	`692`
`693`		`-func (a *stubAIWorker) LLM(ctx context.Context, req worker.GenLLMFormdataRequestBody) (interface{}, error) {`
`694`		`- return &worker.LLMResponse{Response: "response tokens", TokensUsed: 10}, nil`
	`693`	`+func (a *stubAIWorker) LLM(ctx context.Context, req worker.GenLLMJSONRequestBody) (interface{}, error) {`
	`694`	`+ var choices []worker.LLMChoice`
	`695`	`+ choices = append(choices, worker.LLMChoice{Delta: &worker.LLMMessage{Content: "choice1", Role: "assistant"}, Index: 0})`
	`696`	`+ tokensUsed := worker.LLMTokenUsage{PromptTokens: 40, CompletionTokens: 10, TotalTokens: 50}`
	`697`	`+ return &worker.LLMResponse{Choices: choices, Created: 1, Model: "llm_model", TokensUsed: tokensUsed}, nil`
`695`	`698`	`}`
`696`	`699`
`697`	`700`	`func (a stubAIWorker) ImageToText(ctx context.Context, req worker.GenImageToTextMultipartRequestBody) (worker.ImageToTextResponse, error) {`
Original file line number	Diff line number	Diff line change
`@@ -824,14 +824,14 @@ func (orch *orchestrator) SegmentAnything2(ctx context.Context, requestID string`
`824`	`824`	`}`
`825`	`825`
`826`	`826`	`// Return type is LLMResponse, but a stream is available as well as chan(string)`
`827`		`-func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.GenLLMFormdataRequestBody) (interface{}, error) {`
	`827`	`+func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.GenLLMJSONRequestBody) (interface{}, error) {`
`828`	`828`	`// local AIWorker processes job if combined orchestrator/ai worker`
`829`	`829`	`if orch.node.AIWorker != nil {`
`830`	`830`	`// no file response to save, response is text sent back to gateway`
`831`	`831`	`return orch.node.AIWorker.LLM(ctx, req)`
`832`	`832`	`}`
`833`	`833`
`834`		`- res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "llm", *req.ModelId, "", AIJobRequestData{Request: req})`
	`834`	`+ res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "llm", *req.Model, "", AIJobRequestData{Request: req})`
`835`	`835`	`if err != nil {`
`836`	`836`	`return nil, err`
`837`	`837`	`}`
`@@ -842,7 +842,7 @@ func (orch *orchestrator) LLM(ctx context.Context, requestID string, req worker.`
`842`	`842`	`if err != nil {`
`843`	`843`	`clog.Errorf(ctx, "Error saving remote ai result err=%q", err)`
`844`	`844`	`if monitor.Enabled {`
`845`		`- monitor.AIResultSaveError(ctx, "llm", *req.ModelId, string(monitor.SegmentUploadErrorUnknown))`
	`845`	`+ monitor.AIResultSaveError(ctx, "llm", *req.Model, string(monitor.SegmentUploadErrorUnknown))`
`846`	`846`	`}`
`847`	`847`	`return nil, err`
`848`	`848`
`@@ -1087,7 +1087,7 @@ func (n *LivepeerNode) SegmentAnything2(ctx context.Context, req worker.GenSegme`
`1087`	`1087`	`return n.AIWorker.SegmentAnything2(ctx, req)`
`1088`	`1088`	`}`
`1089`	`1089`
`1090`		`-func (n *LivepeerNode) LLM(ctx context.Context, req worker.GenLLMFormdataRequestBody) (interface{}, error) {`
	`1090`	`+func (n *LivepeerNode) LLM(ctx context.Context, req worker.GenLLMJSONRequestBody) (interface{}, error) {`
`1091`	`1091`	`return n.AIWorker.LLM(ctx, req)`
`1092`	`1092`	`}`
`1093`	`1093`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ func Test_submitLLM(t *testing.T) {`
`13`	`13`	`ctx context.Context`
`14`	`14`	`params aiRequestParams`
`15`	`15`	`sess *AISession`
`16`		`- req worker.GenLLMFormdataRequestBody`
	`16`	`+ req worker.GenLLMJSONRequestBody`
`17`	`17`	`}`
`18`	`18`	`tests := []struct {`
`19`	`19`	`name string`