unidoc · anovik · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/README.md b/README.md
@@ -7,6 +7,19 @@ a pull request.
 While the majority of examples are fully in pure Go, there are a few examples that demonstrate additional 
 functionality that requires CGO and external dependencies. Those examples are clarified by filename suffix "_cgo.go".
 
+## Disclaimer
+
+**IMPORTANT:** The code examples provided in this repository are for educational and demonstration purposes only. They are provided "as is" without warranty of any kind, either express or implied. These examples are intended to help developers understand how to use the UniPDF library and may not be suitable for production environments without additional security hardening, error handling, and testing.
+
+UniDoc (the maintainers of this repository) shall not be held responsible for any risks, damages, or issues arising from the use of these code examples in production or any other environment. Users are solely responsible for reviewing, testing, and adapting the code to meet their specific requirements and security standards before deploying to production systems.
+
+It is strongly recommended that you:
+- Conduct thorough security reviews and testing
+- Implement proper input validation and sanitization
+- Add appropriate error handling and logging
+- Follow security best practices for your specific use case
+- Consult with security professionals when handling sensitive data
+
 ## License codes
 UniPDF requires license codes to operate, there are two options:
 - Metered License API keys: Free ones can be obtained at https://cloud.unidoc.io

diff --git a/go.mod b/go.mod
@@ -5,10 +5,12 @@ go 1.23.0
 require (
 	cloud.google.com/go/kms v1.18.5
 	github.com/ThalesIgnite/crypto11 v1.2.5
+	github.com/anthonynsimon/bild v0.14.0
 	github.com/aws/aws-sdk-go v1.55.6
 	github.com/bmatcuk/doublestar v1.3.4
 	github.com/boombuler/barcode v1.0.2
 	github.com/gabriel-vasile/mimetype v1.4.8
+	github.com/stefanhengl/gohocr v0.0.0-20171024154250-dde96807b100
 	github.com/trimmer-io/go-xmp v1.0.0
 	github.com/unidoc/globalsign-dss v0.0.0-20220330092912-b69d85b63736
 	github.com/unidoc/pkcs7 v0.3.0

diff --git a/go.sum b/go.sum
@@ -24,6 +24,8 @@ github.com/adrg/sysfont v0.1.2/go.mod h1:6d3l7/BSjX9VaeXWJt9fcrftFaD/t7l11xgSywC
 github.com/adrg/xdg v0.3.0/go.mod h1:7I2hH/IT30IsupOpKZ5ue7/qNi3CoKzD6tL3HwpaRMQ=
 github.com/adrg/xdg v0.5.3 h1:xRnxJXne7+oWDatRhR1JLnvuccuIeCoBu2rtuLqQB78=
 github.com/adrg/xdg v0.5.3/go.mod h1:nlTsY+NNiCBGCK2tpm09vRqfVzrc2fLmXGpBLF0zlTQ=
+github.com/anthonynsimon/bild v0.14.0 h1:IFRkmKdNdqmexXHfEU7rPlAmdUZ8BDZEGtGHDnGWync=
+github.com/anthonynsimon/bild v0.14.0/go.mod h1:hcvEAyBjTW69qkKJTfpcDQ83sSZHxwOunsseDfeQhUs=
 github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk=
 github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
 github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0=
@@ -109,6 +111,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/stefanhengl/gohocr v0.0.0-20171024154250-dde96807b100 h1:hF/ZvwhZFjvAXLTKinLJZwFf7ajPZp+LUyDc+qtoVzM=
+github.com/stefanhengl/gohocr v0.0.0-20171024154250-dde96807b100/go.mod h1:cPiGn9y/mCPkH6dScOMVru1KnTdtzh/2DvvJrFDS7Sc=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=

diff --git a/ocr/README.md b/ocr/README.md
@@ -0,0 +1,16 @@
+# PDF OCR Examples
+
+UniPDF supports integration with HTTP-based OCR (Optical Character Recognition) services to extract text from images and scanned PDF documents. These examples demonstrate how to configure and use OCR services to process images and reconstruct searchable PDFs from scanned documents.
+
+The OCR functionality works by sending images to a configured HTTP endpoint that performs text recognition and returns the results in various formats including plain text and HOCR (HTML-based OCR format).
+
+## Examples
+
+- [hocr_sample.go](hocr_sample.go) illustrates how to process HOCR formatted OCR output, parsing word-level information including bounding boxes and confidence scores.
+- [ocr_batch.go](ocr_batch.go) shows how to perform batch OCR processing on multiple images concurrently, with error handling and summary reporting.
+- [ocr_sample.go](ocr_sample.go) demonstrates basic OCR usage by sending a single image to an HTTP OCR service and extracting the text content.
+- [reconstruct_pdf_from_hocr.go](reconstruct_pdf_from_hocr.go) demonstrates a complete workflow to extract images from a PDF, perform OCR with HOCR output, parse the structured results, and reconstruct a searchable PDF with properly positioned text.
+
+## Requirements
+
+These examples require an HTTP OCR service running on `http://localhost:8080/file`. The examples are created using [unidoc/ocrserver](https://github.com/unidoc/ocrserver) as the OCR service. However, UniPDF's OCR API is designed to be flexible and should support other OCR services that accept image uploads via multipart form data and return text or HOCR formatted results.
diff --git a/ocr/hocr_sample.go b/ocr/hocr_sample.go
@@ -0,0 +1,103 @@
+/**
+ * This is a sample Go program that demonstrates how to use the UniPDF library
+ * to perform OCR on an image using an HTTP OCR service that returns HOCR formatted
+ * output. The program parses the HOCR response and extracts word-level information
+ * including bounding boxes and confidence scores.
+ *
+ * This example uses https://github.com/unidoc/ocrserver as the OCR service.
+ * However, UniPDF's OCR API is designed to support other OCR services that accept
+ * image uploads via HTTP and return text or HOCR formatted results.
+ *
+ * Run as: go run hocr_sample.go input.jpg
+ */
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strconv"
+
+	"github.com/stefanhengl/gohocr"
+	"github.com/unidoc/unipdf/v4/common/license"
+	"github.com/unidoc/unipdf/v4/ocr"
+)
+
+func init() {
+	// Make sure to load your metered License API key prior to using the library.
+	// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
+	err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
+	if err != nil {
+		panic(err)
+	}
+}
+
+func main() {
+	if len(os.Args) < 2 {
+		fmt.Printf("Usage: go run hocr_sample.go input.jpg\n")
+		os.Exit(1)
+	}
+
+	f, err := os.Open(os.Args[1])
+	if err != nil {
+		fmt.Printf("Error opening file: %v\n", err)
+		os.Exit(1)
+	}
+	defer f.Close()
+
+	// Configure OCR service options.
+	opts := ocr.OCROptions{
+		Url:           "http://localhost:8080/file",
+		Method:        "POST",
+		FileFieldName: "file",
+		Headers: map[string]string{
+			"Accept": "application/json",
+		},
+		FormFields: map[string]string{
+			"format": "hocr",
+		},
+		TimeoutSeconds: 30,
+	}
+
+	// Create OCR client.
+	client := ocr.NewHTTPOCRService(opts)
+
+	result, err := client.ExtractText(context.Background(), f, "image.jpg")
+	if err != nil {
+		fmt.Printf("Error extracting text: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Parse JSON response to extract the "result" field.
+	var jsonObj map[string]interface{}
+	if err := json.Unmarshal(result, &jsonObj); err != nil {
+		fmt.Printf("Error parsing JSON response: %v\n", err)
+		os.Exit(1)
+	}
+
+	content, ok := jsonObj["result"].(string)
+	if !ok {
+		fmt.Printf("Error: result field is not a string\n")
+		os.Exit(1)
+	}
+	fmt.Printf("Extracted text: %s\n", content)
+
+	content, err = strconv.Unquote(content)
+	if err != nil {
+		fmt.Printf("Error unquoting content: %v\n", err)
+		os.Exit(1)
+	}
+
+	contentBytes := []byte(content)
+
+	data, err := gohocr.Parse(contentBytes)
+	if err != nil {
+		fmt.Printf("Error parsing HOCR data: %v\n", err)
+		os.Exit(1)
+	}
+
+	for _, v := range data.Words {
+		fmt.Printf("Word: %s, Title: %f\n", v.Content, v.Title)
+	}
+}
diff --git a/ocr/ocr_batch.go b/ocr/ocr_batch.go
@@ -0,0 +1,98 @@
+/**
+ * This is a sample Go program that demonstrates how to use the UniPDF library
+ * to perform batch OCR processing on multiple images using an HTTP OCR service.
+ * The program processes multiple image files concurrently and displays the extracted
+ * text results along with a summary of successful and failed operations.
+ *
+ * This example uses https://github.com/unidoc/ocrserver as the OCR service.
+ * However, UniPDF's OCR API is designed to support other OCR services that accept
+ * image uploads via HTTP and return text or HOCR formatted results.
+ *
+ * Run as: go run ocr_batch.go image1.jpg image2.png [image3.jpg ...]
+ */
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/unidoc/unipdf/v4/common/license"
+	"github.com/unidoc/unipdf/v4/ocr"
+)
+
+func init() {
+	// Make sure to load your metered License API key prior to using the library.
+	// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
+	err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
+	if err != nil {
+		panic(err)
+	}
+}
+
+func main() {
+	if len(os.Args) < 2 {
+		fmt.Printf("Usage: go run ocr_batch.go image1.jpg image2.png [image3.jpg ...]\n")
+		os.Exit(1)
+	}
+
+	// Get list of image files from command line arguments
+	filePaths := os.Args[1:]
+
+	// Validate that all files exist
+	for _, filePath := range filePaths {
+		if _, err := os.Stat(filePath); os.IsNotExist(err) {
+			fmt.Printf("Error: File does not exist: %s\n", filePath)
+			os.Exit(1)
+		}
+	}
+
+	// Configure OCR service options.
+	opts := ocr.OCROptions{
+		Url:           "http://localhost:8080/file",
+		Method:        "POST",
+		FileFieldName: "file",
+		Headers: map[string]string{
+			"Accept": "application/json",
+		},
+		TimeoutSeconds: 30,
+	}
+
+	// Create OCR client.
+	client := ocr.NewOCRHTTPClient(opts)
+
+	fmt.Printf("Processing %d files...\n", len(filePaths))
+
+	// Batch process files.
+	results, errors := client.BatchProcessFiles(context.Background(), filePaths)
+
+	// Display results
+	for i, filePath := range filePaths {
+		filename := filepath.Base(filePath)
+		fmt.Printf("\n--- Results for %s ---\n", filename)
+
+		if errors[i] != nil {
+			fmt.Printf("Error processing %s: %s\n", filename, errors[i])
+			continue
+		}
+
+		fmt.Printf("Extracted text from %s:\n%s\n", filename, string(results[i]))
+	}
+
+	// Summary
+	successCount := 0
+	errorCount := 0
+	for _, err := range errors {
+		if err != nil {
+			errorCount++
+		} else {
+			successCount++
+		}
+	}
+
+	fmt.Printf("\n--- Summary ---\n")
+	fmt.Printf("Successfully processed: %d files\n", successCount)
+	fmt.Printf("Failed to process: %d files\n", errorCount)
+	fmt.Printf("Total files: %d\n", len(filePaths))
+}
diff --git a/ocr/ocr_sample.go b/ocr/ocr_sample.go
@@ -0,0 +1,66 @@
+/**
+ * This is a sample Go program that demonstrates how to use the UniPDF library
+ * to perform OCR on an image using an HTTP OCR service. The program sends an image
+ * to the configured OCR endpoint and displays the extracted text.
+ *
+ * This example uses https://github.com/unidoc/ocrserver as the OCR service.
+ * However, UniPDF's OCR API is designed to support other OCR services that accept
+ * image uploads via HTTP and return text or HOCR formatted results.
+ *
+ * Run as: go run ocr_sample.go input.jpg
+ */
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/unidoc/unipdf/v4/common/license"
+	"github.com/unidoc/unipdf/v4/ocr"
+)
+
+func init() {
+	// Make sure to load your metered License API key prior to using the library.
+	// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
+	err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
+	if err != nil {
+		panic(err)
+	}
+}
+
+func main() {
+	if len(os.Args) < 2 {
+		fmt.Printf("Usage: go run ocr_sample.go input.jpg\n")
+		os.Exit(1)
+	}
+
+	f, err := os.Open(os.Args[1])
+	if err != nil {
+		fmt.Printf("Error opening file: %v\n", err)
+		os.Exit(1)
+	}
+	defer f.Close()
+
+	// Configure OCR service options.
+	opts := ocr.OCROptions{
+		Url:           "http://localhost:8080/file",
+		Method:        "POST",
+		FileFieldName: "file",
+		Headers: map[string]string{
+			"Accept": "application/json",
+		},
+		TimeoutSeconds: 30,
+	}
+
+	// Create OCR client.
+	client := ocr.NewHTTPOCRService(opts)
+
+	result, err := client.ExtractText(context.Background(), f, "image.jpg")
+	if err != nil {
+		fmt.Printf("Error extracting text: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Extracted text: %s\n", string(result))
+}