Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions core/src/main/java/ai/z/openapi/AbstractAiClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import ai.z.openapi.service.image.ImageServiceImpl;
import ai.z.openapi.service.batches.BatchService;
import ai.z.openapi.service.batches.BatchServiceImpl;
import ai.z.openapi.service.ocr.HandwritingOcrService;
import ai.z.openapi.service.ocr.HandwritingOcrServiceImpl;
import ai.z.openapi.service.web_search.WebSearchService;
import ai.z.openapi.service.web_search.WebSearchServiceImpl;
import ai.z.openapi.service.web_reader.WebReaderService;
Expand Down Expand Up @@ -116,6 +118,9 @@ public abstract class AbstractAiClient extends AbstractClientBaseService {
/** FileParsing service for fileParsing operations */
private FileParsingService fileParsingService;

/** HandWriting service for handwritingOcrService operations */
private HandwritingOcrService handwritingOcrService;

/** Moderation service for content safety detection */
private ModerationService moderationService;

Expand Down Expand Up @@ -295,6 +300,13 @@ public synchronized FileParsingService fileParsing() {
return fileParsingService;
}

public synchronized HandwritingOcrService handwriting() {
if (handwritingOcrService == null) {
this.handwritingOcrService = new HandwritingOcrServiceImpl(this);
}
return handwritingOcrService;
}

/**
* Returns the moderation service for content safety detection. This service handles
* content moderation for text, image, video, and audio inputs.
Expand Down
24 changes: 24 additions & 0 deletions core/src/main/java/ai/z/openapi/api/ocr/HandwritingOcrApi.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai.z.openapi.api.ocr;

import ai.z.openapi.service.ocr.HandwritingOcrResult;
import okhttp3.MultipartBody;
import retrofit2.Call;
import retrofit2.http.Body;
import retrofit2.http.POST;

/**
* OCR Handwriting Recognition API Provides functionality to upload an image for
* handwriting recognition, and retrieve the parsed result.
*/
public interface HandwritingOcrApi {

/**
* Executes handwriting recognition synchronously.
* @param multipartBody The multipart request body containing the image file and
* metadata.
* @return The recognition result as a HandwritingOcrResp object.
*/
@POST("files/ocr")
Call<HandwritingOcrResult> recognize(@Body MultipartBody multipartBody);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package ai.z.openapi.service.ocr;

import ai.z.openapi.core.model.ClientResponse;
import ai.z.openapi.service.model.ChatError;
import lombok.Data;

@Data
public class HandwritingOcrResponse implements ClientResponse<HandwritingOcrResult> {

/**
* Response status code.
*/
private int code;

/**
* Response message.
*/
private String msg;

/**
* Indicates whether the request was successful.
*/
private boolean success;

/**
* The HandwritingOcr result data.
*/
private HandwritingOcrResult data;

/**
* Error information if the request failed.
*/
private ChatError error;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ai.z.openapi.service.ocr;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.List;

@Data
@AllArgsConstructor
@NoArgsConstructor
public class HandwritingOcrResult {

private String task_id; // Task ID or result ID

private String message; // Response message

private String status; // OCR task status (e.g., "succeeded")

private int words_result_num; // Number of recognition results

private List<WordsResult> words_result; // List of recognition results

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ai.z.openapi.service.ocr;

public interface HandwritingOcrService {

/**
* Executes a synchronous handwriting recognition operation.
* @param request The OCR upload request (contains file path, tool type, language
* type)
* @return HandwritingOcrResp containing the recognition result
*/
HandwritingOcrResponse recognize(HandwritingOcrUploadReq request);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package ai.z.openapi.service.ocr;

import ai.z.openapi.AbstractAiClient;
import ai.z.openapi.api.ocr.HandwritingOcrApi;
import ai.z.openapi.utils.RequestSupplier;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.reactivex.rxjava3.core.Single;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.RequestBody;
import retrofit2.Response;

import java.io.File;
import java.io.IOException;

/**
* OCR handwriting recognition service implementation
*/
public class HandwritingOcrServiceImpl implements HandwritingOcrService {

private final AbstractAiClient zAiClient;

private final HandwritingOcrApi handwritingOcrApi;

public HandwritingOcrServiceImpl(AbstractAiClient zAiClient) {
this.zAiClient = zAiClient;
this.handwritingOcrApi = zAiClient.retrofit().create(HandwritingOcrApi.class);
}

@Override
public HandwritingOcrResponse recognize(HandwritingOcrUploadReq request) {
if (request == null) {
throw new IllegalArgumentException("request cannot be null");
}
if (request.getFilePath() == null) {
throw new IllegalArgumentException("filePath cannot be null");
}
if (request.getToolType() == null) {
throw new IllegalArgumentException("toolType cannot be null");
}

RequestSupplier<HandwritingOcrUploadReq, HandwritingOcrResult> supplier = params -> {
try {
File file = new File(params.getFilePath());
if (!file.exists()) {
throw new RuntimeException("file not found at " + params.getFilePath());
}
String toolType = params.getToolType();
String languageType = params.getLanguageType();
Boolean probability = params.getProbability();

// Build multipart/form-data
MultipartBody.Part filePart = MultipartBody.Part.createFormData("file", file.getName(),
RequestBody.create(MediaType.parse("application/octet-stream"), file));
MultipartBody.Builder formBodyBuilder = new MultipartBody.Builder().setType(MultipartBody.FORM);
formBodyBuilder.addPart(filePart);
formBodyBuilder.addFormDataPart("tool_type", toolType);
if (languageType != null) {
formBodyBuilder.addFormDataPart("language_type", languageType);
}
if (probability != null) {
formBodyBuilder.addFormDataPart("probability", String.valueOf(probability));
}

MultipartBody multipartBody = formBodyBuilder.build();

// Send POST request
retrofit2.Call<HandwritingOcrResult> call = handwritingOcrApi.recognize(multipartBody);
Response<HandwritingOcrResult> response = call.execute();
if (!response.isSuccessful() || response.body() == null) {
String errorJson = "";
if (response.errorBody() != null) {
errorJson = response.errorBody().string();
}
String msg = response.message();
String msgFromBody = "";
if (errorJson != null && errorJson.trim().startsWith("{")) {
try {
ObjectMapper mapper = new ObjectMapper();
HandwritingOcrResult errorResult = mapper.readValue(errorJson, HandwritingOcrResult.class);
msgFromBody = errorResult.getMessage();
}
catch (Exception e) {
msgFromBody = errorJson;
}
}
throw new IOException("Failed to recognize, code: " + response.code() + ", msg: " + msg
+ (msgFromBody.isEmpty() ? "" : (", detail: " + msgFromBody)));
}

return Single.just(response.body());

}
catch (Exception e) {
throw new RuntimeException(e);
}
};

return this.zAiClient.executeRequest(request, supplier, HandwritingOcrResponse.class);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ai.z.openapi.service.ocr;

import ai.z.openapi.core.model.ClientRequest;
import lombok.Data;

/**
* Handwriting OCR upload request object
*/
@Data
public class HandwritingOcrUploadReq implements ClientRequest<HandwritingOcrUploadReq> {

private String filePath; // Path to the image file

private String toolType; // Tool type, must be "hand_write"

private String languageType; // Language type (optional)

private Boolean probability; // Confidence score for each line of text recognition

}
20 changes: 20 additions & 0 deletions core/src/main/java/ai/z/openapi/service/ocr/Location.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ai.z.openapi.service.ocr;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@AllArgsConstructor
@NoArgsConstructor
public class Location {

private int left;

private int top;

private int width;

private int height;

}
18 changes: 18 additions & 0 deletions core/src/main/java/ai/z/openapi/service/ocr/Probability.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ai.z.openapi.service.ocr;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@AllArgsConstructor
@NoArgsConstructor
public class Probability {

private Double average; // Average confidence of the line

private Double variance; // Confidence variance of the line

private Double min; // Minimum confidence of the line

}
18 changes: 18 additions & 0 deletions core/src/main/java/ai/z/openapi/service/ocr/WordsResult.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ai.z.openapi.service.ocr;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

@Data
@AllArgsConstructor
@NoArgsConstructor
public class WordsResult {

private Location location; // Location information for detected text

private String words; // Recognized text

private Probability probability; // Confidence score for each line of text recognition

}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
</scm>

<properties>
<revision>0.1.0</revision>
<revision>0.1.1</revision>
<java.version>8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
Expand Down
Loading