GH-1727 fix huggingface generate text

jitokim · ilayaperumalg · commit 3c14fa633ad0 · 2024-11-14T21:15:50.000Z
- Update GenerateResponse content schema type to array at openapi.json
   - Use CompatGenerateRequest instead of GenerateRequest for the TextGenerationInference API Request

Signed-off-by: jitokim &lt;pigberger70@gmail.com&gt;
diff --git a/models/spring-ai-huggingface/src/main/java/org/springframework/ai/huggingface/HuggingfaceChatModel.java b/models/spring-ai-huggingface/src/main/java/org/springframework/ai/huggingface/HuggingfaceChatModel.java
@@ -32,15 +32,16 @@
 import org.springframework.ai.huggingface.api.TextGenerationInferenceApi;
 import org.springframework.ai.huggingface.invoker.ApiClient;
 import org.springframework.ai.huggingface.model.AllOfGenerateResponseDetails;
+import org.springframework.ai.huggingface.model.CompatGenerateRequest;
 import org.springframework.ai.huggingface.model.GenerateParameters;
-import org.springframework.ai.huggingface.model.GenerateRequest;
 import org.springframework.ai.huggingface.model.GenerateResponse;
 
 /**
  * An implementation of {@link ChatModel} that interfaces with HuggingFace Inference
  * Endpoints for text generation.
  *
  * @author Mark Pollack
+ * @author Jihoon Kim
  */
 public class HuggingfaceChatModel implements ChatModel {
 
@@ -89,22 +90,24 @@ public HuggingfaceChatModel(final String apiToken, String basePath) {
 	 */
 	@Override
 	public ChatResponse call(Prompt prompt) {
-		GenerateRequest generateRequest = new GenerateRequest();
-		generateRequest.setInputs(prompt.getContents());
+		CompatGenerateRequest compatGenerateRequest = new CompatGenerateRequest();
+		compatGenerateRequest.setInputs(prompt.getContents());
 		GenerateParameters generateParameters = new GenerateParameters();
 		// TODO - need to expose API to set parameters per call.
 		generateParameters.setMaxNewTokens(this.maxNewTokens);
-		generateRequest.setParameters(generateParameters);
-		GenerateResponse generateResponse = this.textGenApi.generate(generateRequest);
-		String generatedText = generateResponse.getGeneratedText();
+		compatGenerateRequest.setParameters(generateParameters);
+		List<GenerateResponse> generateResponses = this.textGenApi.compatGenerate(compatGenerateRequest);
 		List<Generation> generations = new ArrayList<>();
-		AllOfGenerateResponseDetails allOfGenerateResponseDetails = generateResponse.getDetails();
-		Map<String, Object> detailsMap = this.objectMapper.convertValue(allOfGenerateResponseDetails,
-				new TypeReference<Map<String, Object>>() {
-
-				});
-		Generation generation = new Generation(generatedText, detailsMap);
-		generations.add(generation);
+		for (GenerateResponse generateResponse : generateResponses) {
+			String generatedText = generateResponse.getGeneratedText();
+			AllOfGenerateResponseDetails allOfGenerateResponseDetails = generateResponse.getDetails();
+			Map<String, Object> detailsMap = this.objectMapper.convertValue(allOfGenerateResponseDetails,
+					new TypeReference<Map<String, Object>>() {
+
+					});
+			Generation generation = new Generation(generatedText, detailsMap);
+			generations.add(generation);
+		}
 		return new ChatResponse(generations);
 	}
 
diff --git a/models/spring-ai-huggingface/src/main/resources/openapi.json b/models/spring-ai-huggingface/src/main/resources/openapi.json
@@ -37,7 +37,10 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/GenerateResponse"
+                  "type": "array",
+                  "items": {
+                    "$ref": "#/components/schemas/GenerateResponse"
+                  }
                 }
               },
               "text/event-stream": {
diff --git a/models/spring-ai-huggingface/src/test/java/org/springframework/ai/huggingface/client/ClientIT.java b/models/spring-ai-huggingface/src/test/java/org/springframework/ai/huggingface/client/ClientIT.java
@@ -43,16 +43,20 @@ void helloWorldCompletion() {
 				lastname: Smith
 				address: #1 Samuel St.
 				Just generate the JSON object without explanations:
+				Your response should be in JSON format.
+				Do not include any explanations, only provide a RFC8259 compliant JSON response following this format without deviation.
+				Do not include markdown code blocks in your response.
+				Remove the ```json markdown from the output.
 				[/INST]
 				""";
 		Prompt prompt = new Prompt(mistral7bInstruct);
 		ChatResponse chatResponse = this.huggingfaceChatModel.call(prompt);
 		assertThat(chatResponse.getResult().getOutput().getContent()).isNotEmpty();
 		String expectedResponse = """
 				{
-					"name": "John",
-					"lastname": "Smith",
-					"address": "#1 Samuel St."
+				  "name": "John",
+				  "lastname": "Smith",
+				  "address": "#1 Samuel St."
 				}""";
 		assertThat(chatResponse.getResult().getOutput().getContent()).isEqualTo(expectedResponse);
 		assertThat(chatResponse.getResult().getOutput().getMetadata()).containsKey("generated_tokens");