diff --git a/CHANGELOG.md b/CHANGELOG.md
index 458d65b..ef3da6d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+# 2.1.0
+- Added Mistral OCR. See the README.md for more details.
+
# 2.0.0
- **BREAKING**: Major refactor of message handling for chat completions:
diff --git a/README.md b/README.md
index b950127..0558c64 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
**Mistral-java-client** is a Java client for the [Mistral AI](https://mistral.ai/) API. It allows you to easily interact
with the Mistral AI API from your Java application.
-Supports all chat completion and embedding models available in the API.
+Supports all chat completion, OCR, and embedding models available in the API.
New models or models not listed here may be already supported without any updates to the library.
@@ -15,6 +15,7 @@ Mistral-java-client is built against version 0.0.2 of the [Mistral AI API](https
- [Chat Completion](https://docs.mistral.ai/api/#tag/chat/operation/chat_completion_v1_chat_completions_post)
- [List Models](https://docs.mistral.ai/api/#tag/models/operation/list_models_v1_models_get)
- [Embeddings](https://docs.mistral.ai/api/#tag/embeddings/operation/embeddings_v1_embeddings_post)
+- [OCR](https://docs.mistral.ai/api/#tag/ocr)
# Requirements
@@ -33,7 +34,7 @@ repositories {
}
dependencies {
- implementation 'com.github.Dannyj1:mistral-java-client:2.0.0'
+ implementation 'com.github.Dannyj1:mistral-java-client:2.1.0'
}
```
@@ -51,7 +52,7 @@ dependencies {
com.github.Dannyj1
mistral-java-client
- 2.0.0
+ 2.1.0
```
@@ -397,11 +398,74 @@ Example output:
[-0.02015686, 0.04272461, 0.05529785, ... , -0.006855011, 0.009529114, -0.016448975]
```
+
+## OCR Completion
+
+This example shows how to use the Mistral AI API to perform OCR on a document.
+
+```java
+import nl.dannyj.mistral.MistralClient;
+import nl.dannyj.mistral.models.completion.content.DocumentURLChunk;
+import nl.dannyj.mistral.models.ocr.OCRRequest;
+import nl.dannyj.mistral.models.ocr.OCRResponse;
+import nl.dannyj.mistral.models.ocr.OCRPageObject;
+
+import java.net.URI;
+
+public class MinimalOcrExample {
+
+ public static void main(String[] args) {
+ // Replace "C:\\path\\to\\file.pdf" with the actual path to your document
+ String filePath = "C:\\\\path\\\\to\\\\file.pdf";
+ File documentFile = new File(filePath);
+
+ // Replace "YOUR_API_KEY" with your actual Mistral AI API key
+ // Or set the MISTRAL_API_KEY environment variable
+ MistralClient client = new MistralClient("YOUR_API_KEY");
+
+ // Convert document to base64
+ byte[] documentBytes = Files.readAllBytes(documentFile.toPath());
+ String documentBase64 = Base64.getEncoder().encodeToString(documentBytes);
+ URI documentUrl = URI.create("data:application/pdf;base64," + documentBase64);
+
+ DocumentURLChunk documentChunk = DocumentURLChunk.builder()
+ .documentUrl(documentUrl)
+ .documentName("your_document.pdf") // Replace with your document name
+ .build();
+
+ OCRRequest request = OCRRequest.builder()
+ .model("mistral-ocr-latest") // Or another supported OCR model
+ .document(documentChunk)
+ .build();
+
+ try {
+ System.out.println("Performing OCR...");
+ OCRResponse response = client.createOcrCompletion(request);
+
+ System.out.println("OCR Results:");
+ if (response.getPages() != null && !response.getPages().isEmpty()) {
+ // Print markdown content of the first page
+ OCRPageObject firstPage = response.getPages().get(0);
+ System.out.println("--- Page " + firstPage.getIndex() + " ---");
+ System.out.println("Markdown Content:");
+ System.out.println(firstPage.getMarkdown());
+ } else {
+ System.out.println("No pages processed or results found.");
+ }
+
+ } catch (Exception e) {
+ System.err.println("An error occurred: " + e.getMessage());
+ e.printStackTrace();
+ }
+ }
+}
+```
+
# Roadmap
- [ ] Make multi-modal usage more convenient (through builders, etc.)
- [ ] Make JSON schemas for function calling more developer-friendly
-- [ ] Add support for all missing features (e.g. OCR)
+- [ ] Add support for all missing features (e.g. Codestral)
- [ ] Handle rate limits
- [ ] Unit tests
diff --git a/build.gradle b/build.gradle
index fbc2998..b6ee355 100644
--- a/build.gradle
+++ b/build.gradle
@@ -6,7 +6,7 @@ plugins {
}
group = "nl.dannyj"
-version = "2.0.0"
+version = "2.1.0"
repositories {
mavenCentral()
diff --git a/src/main/java/nl/dannyj/mistral/MistralClient.java b/src/main/java/nl/dannyj/mistral/MistralClient.java
index 1e130b0..51ecc5f 100644
--- a/src/main/java/nl/dannyj/mistral/MistralClient.java
+++ b/src/main/java/nl/dannyj/mistral/MistralClient.java
@@ -30,11 +30,14 @@
import nl.dannyj.mistral.models.embedding.EmbeddingRequest;
import nl.dannyj.mistral.models.embedding.EmbeddingResponse;
import nl.dannyj.mistral.models.model.ListModelsResponse;
+import nl.dannyj.mistral.models.ocr.OCRRequest;
+import nl.dannyj.mistral.models.ocr.OCRResponse;
import nl.dannyj.mistral.net.ChatCompletionChunkCallback;
import nl.dannyj.mistral.services.HttpService;
import nl.dannyj.mistral.services.MistralService;
import okhttp3.OkHttpClient;
+import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
@@ -72,7 +75,7 @@ public MistralClient(@NonNull String apiKey) {
* Default constructor that initializes the MistralClient with the API key from the environment variable "MISTRAL_API_KEY".
*/
public MistralClient() {
- this.apiKey = System.getenv(API_KEY_ENV_VAR);
+ this.apiKey = Objects.requireNonNull(System.getenv(API_KEY_ENV_VAR), "API key not found in environment variable " + API_KEY_ENV_VAR);
this.httpClient = buildHttpClient(120, 10, 10);
this.objectMapper = buildObjectMapper();
this.mistralService = buildMistralService();
@@ -134,7 +137,7 @@ public MistralClient(@NonNull String apiKey, int readTimeoutSeconds, int connect
}
/**
- * Default constructor that initializes the MistralClient with the API key from the environment variable "MISTRAL_API_KEY" and custom timeouts.
+ * Default constructor that initializes the MistralClient with the API key from the environment variable "MISTRAL_API_KEY".
*
* @param readTimeoutSeconds The read timeout in seconds
* @param connectTimeoutSeconds The connect timeout in seconds
@@ -241,6 +244,32 @@ public CompletableFuture listModelsAsync() {
return mistralService.listModelsAsync();
}
+ /**
+ * Use the Mistral AI API to perform OCR on a document.
+ * This is a blocking method.
+ *
+ * @param request The request to perform OCR. See {@link OCRRequest}.
+ * @return The response from the Mistral AI API containing the OCR results. See {@link OCRResponse}.
+ * @throws ConstraintViolationException if the request does not pass validation
+ * @throws UnexpectedResponseException if an unexpected response is received from the Mistral AI API
+ */
+ public OCRResponse performOcr(@NonNull OCRRequest request) {
+ return mistralService.performOcr(request);
+ }
+
+ /**
+ * Use the Mistral AI API to perform OCR on a document.
+ * This is a non-blocking/asynchronous method.
+ *
+ * @param request The request to perform OCR. See {@link OCRRequest}.
+ * @return A CompletableFuture that will complete with the OCR results from the Mistral AI API. See {@link OCRResponse}.
+ * @throws ConstraintViolationException if the request does not pass validation
+ * @throws UnexpectedResponseException if an unexpected response is received from the Mistral AI API
+ */
+ public CompletableFuture performOcrAsync(@NonNull OCRRequest request) {
+ return mistralService.performOcrAsync(request);
+ }
+
public void createChatCompletionStream(@NonNull ChatCompletionRequest request, @NonNull ChatCompletionChunkCallback callback) {
mistralService.createChatCompletionStream(request, callback);
}
diff --git a/src/main/java/nl/dannyj/mistral/models/completion/content/DocumentURLChunk.java b/src/main/java/nl/dannyj/mistral/models/completion/content/DocumentURLChunk.java
index d66d4b3..7107756 100644
--- a/src/main/java/nl/dannyj/mistral/models/completion/content/DocumentURLChunk.java
+++ b/src/main/java/nl/dannyj/mistral/models/completion/content/DocumentURLChunk.java
@@ -21,6 +21,7 @@
import jakarta.annotation.Nullable;
import jakarta.validation.constraints.NotNull;
import lombok.AllArgsConstructor;
+import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
@@ -31,6 +32,7 @@
*/
@NoArgsConstructor
@AllArgsConstructor
+@Builder
public class DocumentURLChunk implements ContentChunk {
/**
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRImageObject.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRImageObject.java
new file mode 100644
index 0000000..04ed44f
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRImageObject.java
@@ -0,0 +1,77 @@
+package nl.dannyj.mistral.models.ocr;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import jakarta.annotation.Nullable;
+import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.PositiveOrZero;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Represents an extracted image object within an OCR page.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRImageObject {
+
+ /**
+ * Image ID for extracted image in a page.
+ *
+ * @return The image ID.
+ */
+ @NotNull
+ private String id;
+
+ /**
+ * X coordinate of top-left corner of the extracted image.
+ *
+ * @return The top-left X coordinate.
+ */
+ @NotNull
+ @PositiveOrZero
+ @JsonProperty("top_left_x")
+ private Integer topLeftX;
+
+ /**
+ * Y coordinate of top-left corner of the extracted image.
+ *
+ * @return The top-left Y coordinate.
+ */
+ @NotNull
+ @PositiveOrZero
+ @JsonProperty("top_left_y")
+ private Integer topLeftY;
+
+ /**
+ * X coordinate of bottom-right corner of the extracted image.
+ *
+ * @return The bottom-right X coordinate.
+ */
+ @NotNull
+ @PositiveOrZero
+ @JsonProperty("bottom_right_x")
+ private Integer bottomRightX;
+
+ /**
+ * Y coordinate of bottom-right corner of the extracted image.
+ *
+ * @return The bottom-right Y coordinate.
+ */
+ @NotNull
+ @PositiveOrZero
+ @JsonProperty("bottom_right_y")
+ private Integer bottomRightY;
+
+ /**
+ * Base64 string of the extracted image.
+ *
+ * @return The Base64 image string.
+ */
+ @Nullable
+ @JsonProperty("image_base64")
+ private String imageBase64;
+}
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageDimensions.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageDimensions.java
new file mode 100644
index 0000000..ba3e000
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageDimensions.java
@@ -0,0 +1,45 @@
+package nl.dannyj.mistral.models.ocr;
+
+import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.PositiveOrZero;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Represents the dimensions of a PDF page's screenshot image.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRPageDimensions {
+
+ /**
+ * Dots per inch of the page-image.
+ *
+ * @return The DPI of the page image.
+ */
+ @NotNull
+ @PositiveOrZero
+ private Integer dpi;
+
+ /**
+ * Height of the image in pixels.
+ *
+ * @return The height of the image.
+ */
+ @NotNull
+ @PositiveOrZero
+ private Integer height;
+
+ /**
+ * Width of the image in pixels.
+ *
+ * @return The width of the image.
+ */
+ @NotNull
+ @PositiveOrZero
+ private Integer width;
+}
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageObject.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageObject.java
new file mode 100644
index 0000000..bbd1ad6
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRPageObject.java
@@ -0,0 +1,53 @@
+package nl.dannyj.mistral.models.ocr;
+
+import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.PositiveOrZero;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import java.util.List;
+
+/**
+ * Represents the OCR information for a single page.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRPageObject {
+
+ /**
+ * The page index in a PDF document starting from 0.
+ *
+ * @return The page index.
+ */
+ @NotNull
+ @PositiveOrZero
+ private Integer index;
+
+ /**
+ * The markdown string response of the page.
+ *
+ * @return The markdown string response.
+ */
+ @NotNull
+ private String markdown;
+
+ /**
+ * List of all extracted images in the page.
+ *
+ * @return The list of extracted images.
+ */
+ @NotNull
+ private List images;
+
+ /**
+ * The dimensions of the PDF Page's screenshot image.
+ *
+ * @return The dimensions of the page.
+ */
+ @NotNull
+ private OCRPageDimensions dimensions;
+}
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRRequest.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRRequest.java
new file mode 100644
index 0000000..d987514
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRRequest.java
@@ -0,0 +1,92 @@
+package nl.dannyj.mistral.models.ocr;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import jakarta.validation.constraints.NotNull;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import nl.dannyj.mistral.models.Request;
+import nl.dannyj.mistral.models.completion.content.ContentChunk;
+import nl.dannyj.mistral.models.completion.content.DocumentURLChunk;
+import nl.dannyj.mistral.models.completion.content.ImageURLChunk;
+
+import java.util.List;
+
+/**
+ * Represents the request body for the OCR API endpoint (`/v1/ocr`).
+ */
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Builder
+public class OCRRequest implements Request {
+
+ /**
+ * ID of the model to use.
+ *
+ * @param model The model's ID. Can't be null.
+ * @return The model's ID.
+ */
+ @NotNull
+ private String model;
+
+ /**
+ * Optional ID for the request.
+ *
+ * @param id The optional ID for the request.
+ * @return The optional ID for the request.
+ */
+ private String id;
+
+ /**
+ * Document to run OCR on. Can be a DocumentURLChunk or an ImageURLChunk.
+ *
+ * @param document The document to run OCR on. Can be a {@link DocumentURLChunk} or an {@link ImageURLChunk}. Can't be null.
+ * @return The document to run OCR on.
+ */
+ @NotNull
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type", visible = false)
+ @JsonSubTypes({
+ @JsonSubTypes.Type(value = DocumentURLChunk.class, name = "document_url"),
+ @JsonSubTypes.Type(value = ImageURLChunk.class, name = "image_url")
+ })
+ private ContentChunk document;
+
+ /**
+ * Specific pages user wants to process in various formats: single number, range, or list of both. Starts from 0.
+ *
+ * @param pages A list of specific page indices to process. Starts from 0. Null to process all pages.
+ * @return A list of specific page indices to process.
+ */
+ private List pages;
+
+ /**
+ * Include image URLs in response.
+ *
+ * @param includeImageBase64 Whether to include image URLs in the response. Null for default behavior.
+ * @return Whether to include image URLs in the response.
+ */
+ @JsonProperty("include_image_base64")
+ private Boolean includeImageBase64;
+
+ /**
+ * Maximum images to extract.
+ *
+ * @param imageLimit The maximum number of images to extract. Null for default behavior.
+ * @return The maximum number of images to extract.
+ */
+ @JsonProperty("image_limit")
+ private Integer imageLimit;
+
+ /**
+ * Minimum height and width of image to extract.
+ *
+ * @param imageMinSize The minimum height and width of images to extract. Null for default behavior.
+ * @return The minimum height and width of images to extract.
+ */
+ @JsonProperty("image_min_size")
+ private Integer imageMinSize;
+}
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRResponse.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRResponse.java
new file mode 100644
index 0000000..296175f
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRResponse.java
@@ -0,0 +1,42 @@
+package nl.dannyj.mistral.models.ocr;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import nl.dannyj.mistral.models.Response;
+
+import java.util.List;
+
+/**
+ * Represents the response body from the OCR API endpoint (`/v1/ocr`).
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRResponse implements Response {
+
+ /**
+ * List of OCR info for pages.
+ *
+ * @return The list of OCR info for pages.
+ */
+ private List pages;
+
+ /**
+ * The model used to generate the OCR.
+ *
+ * @return The model used to generate the OCR.
+ */
+ private String model;
+
+ /**
+ * Usage info for the OCR request.
+ *
+ * @return The usage info for the OCR request.
+ */
+ @JsonProperty("usage_info")
+ private OCRUsageInfo usageInfo;
+}
diff --git a/src/main/java/nl/dannyj/mistral/models/ocr/OCRUsageInfo.java b/src/main/java/nl/dannyj/mistral/models/ocr/OCRUsageInfo.java
new file mode 100644
index 0000000..2df9c18
--- /dev/null
+++ b/src/main/java/nl/dannyj/mistral/models/ocr/OCRUsageInfo.java
@@ -0,0 +1,40 @@
+package nl.dannyj.mistral.models.ocr;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import jakarta.annotation.Nullable;
+import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.PositiveOrZero;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Represents the usage information for an OCR request.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRUsageInfo {
+
+ /**
+ * Number of pages processed.
+ *
+ * @return The number of pages processed.
+ */
+ @NotNull
+ @PositiveOrZero
+ @JsonProperty("pages_processed")
+ private Integer pagesProcessed;
+
+ /**
+ * Document size in bytes.
+ *
+ * @return The document size in bytes.
+ */
+ @Nullable
+ @PositiveOrZero
+ @JsonProperty("doc_size_bytes")
+ private Integer docSizeBytes;
+}
diff --git a/src/main/java/nl/dannyj/mistral/services/MistralService.java b/src/main/java/nl/dannyj/mistral/services/MistralService.java
index a28da8e..a2c91b3 100644
--- a/src/main/java/nl/dannyj/mistral/services/MistralService.java
+++ b/src/main/java/nl/dannyj/mistral/services/MistralService.java
@@ -37,6 +37,8 @@
import nl.dannyj.mistral.models.embedding.EmbeddingRequest;
import nl.dannyj.mistral.models.embedding.EmbeddingResponse;
import nl.dannyj.mistral.models.model.ListModelsResponse;
+import nl.dannyj.mistral.models.ocr.OCRRequest;
+import nl.dannyj.mistral.models.ocr.OCRResponse;
import nl.dannyj.mistral.net.ChatCompletionChunkCallback;
import okhttp3.Call;
import okhttp3.Callback;
@@ -60,7 +62,7 @@ public class MistralService {
/**
* Constructor that initializes the MistralService with a provided HttpService and ObjectMapper.
*
- * @param httpService The HttpService to be used for making HTTP requests to the Mistral AI API
+ * @param httpService The HttpService to be used for making HTTP requests to the Mistral AI API
* @param objectMapper The ObjectMapper to be used for converting objects to and from JSON
*/
public MistralService(@NonNull HttpService httpService, @NonNull ObjectMapper objectMapper) {
@@ -198,11 +200,40 @@ public EmbeddingResponse createEmbedding(@NonNull EmbeddingRequest request) {
*
* @param request The request to create an embedding. See {@link EmbeddingRequest}.
* @return A CompletableFuture that will complete with the generated embedding from the Mistral AI API. See {@link EmbeddingResponse}.
+ * @throws ConstraintViolationException if the request does not pass validation
+ * @throws UnexpectedResponseException if an unexpected response is received from the Mistral AI API
*/
public CompletableFuture createEmbeddingAsync(@NonNull EmbeddingRequest request) {
return CompletableFuture.supplyAsync(() -> createEmbedding(request));
}
+ /**
+ * Use the Mistral AI API to perform OCR on a document.
+ * This is a blocking method.
+ *
+ * @param request The request to perform OCR. See {@link OCRRequest}.
+ * @return The response from the Mistral AI API containing the OCR results. See {@link OCRResponse}.
+ * @throws ConstraintViolationException if the request does not pass validation
+ * @throws UnexpectedResponseException if an unexpected response is received from the Mistral AI API
+ */
+ public OCRResponse performOcr(@NonNull OCRRequest request) {
+ validateRequest(request);
+ return postRequest("/ocr", request, OCRResponse.class);
+ }
+
+ /**
+ * Use the Mistral AI API to perform OCR on a document.
+ * This is a non-blocking/asynchronous method.
+ *
+ * @param request The request to perform OCR. See {@link OCRRequest}.
+ * @return A CompletableFuture that will complete with the OCR results from the Mistral AI API. See {@link OCRResponse}.
+ * @throws ConstraintViolationException if the request does not pass validation
+ * @throws UnexpectedResponseException if an unexpected response is received from the Mistral AI API
+ */
+ public CompletableFuture performOcrAsync(@NonNull OCRRequest request) {
+ return CompletableFuture.supplyAsync(() -> performOcr(request));
+ }
+
/**
* This method is used to validate the request using the provided validator.
* If there are any constraint violations, it throws a ConstraintViolationException.