setup issue on GPU and and response time on CPU #16237

FinbizCode2025 · 2025-08-14T12:12:16Z

FinbizCode2025
Aug 14, 2025

first i am facing issue with setup paddleocr on my gpu rtx5070 ti 16gb .. i have cuda version 12.9 .. that is not supporting i ahve tried to install paddle 3.1 or nightly build both .. always getting this issue:
Fetching 6 files: 100%|██████████████████████████████████████████████████████| 6/6 [00:00<00:00, 12716.43it/s]
/mnt/data/office project/office project/padd/ppstructure/app.py:13: DeprecationWarning: Please use predict instead.
result = ocr.ocr(img_path, cls=False)
Traceback (most recent call last):
File "/mnt/data/office project/office project/padd/ppstructure/app.py", line 13, in
result = ocr.ocr(img_path, cls=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/luthradigital/anaconda3/envs/paddler_py311/lib/python3.11/site-packages/typing_extensions.py", line 2950, in wrapper
return arg(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^
File "/home/luthradigital/anaconda3/envs/paddler_py311/lib/python3.11/site-packages/paddleocr/_pipelines/ocr.py", line 225, in ocr
return self.predict(img, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: PaddleOCR.predict() got an unexpected keyword argument 'cls'
(paddler_py311) luthradigital@luthradigital-Z790-D-AX:/mnt/data/office project/office project/padd/ppstructure$

History restored

2nd issue on CPU;

All setup have been done. ocr is running . but is is taking alot of time to respond .. like i have sent a four page pdf to it but it is taking 1-2 hours to recognize it ....

this is my code:

from flask import Flask, request
from pathlib import Path
from paddleocr import PPStructureV3
from pdf2image import convert_from_path
import os
import uuid
import platform

app = Flask(name)

Folders

UPLOAD_FOLDER = "uploads"
OUTPUT_FOLDER = "output"
IMAGES_FOLDER = "converted_images"

os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
os.makedirs(IMAGES_FOLDER, exist_ok=True)

Poppler path (Windows & Linux)

if platform.system() == "Windows":
POPPLER_PATH = r"C:\poppler\Release-24.08.0-0\poppler-24.08.0\Library\bin" # Change this if installed in a different location
else:
# For Linux, ensure poppler-utils is installed (sudo apt install poppler-utils)
POPPLER_PATH = "/usr/bin" # Default poppler-utils path

Initialize OCR pipeline

print("Initializing PPStructureV3 OCR pipeline...")
pipeline = PPStructureV3(
use_doc_orientation_classify=False,
use_doc_unwarping=False
)
print("OCR pipeline initialized successfully.")

HTML UI (inline)

HTML_FORM = """

<title>PDF to Structured Data</title> <style> body {{ font-family: Arial; margin: 20px; }} form {{ background: #f9f9f9; padding: 20px; border-radius: 8px; }} input[type=file] {{ margin-bottom: 15px; }} button {{ background: #007BFF; color: white; border: none; padding: 10px 15px; border-radius: 4px; cursor: pointer; }} pre {{ background: #f4f4f4; padding: 10px; border-radius: 6px; white-space: pre-wrap; word-wrap: break-word; }} h3 {{ margin-top: 30px; }} </style>

Extract Data from PDF

Select PDF file:

Upload & Extract {results} """

@app.route("/", methods=["GET", "POST"])
def index():
if request.method == "POST":
print("📄 PDF upload request received.")

    file = request.files.get("pdf_file")
    if not file:
        print("❌ No file uploaded.")
        return HTML_FORM.format(results="<p style='color:red;'>No file uploaded</p>")

    # Save uploaded PDF
    pdf_id = str(uuid.uuid4())
    pdf_path = os.path.join(UPLOAD_FOLDER, f"{pdf_id}.pdf")
    file.save(pdf_path)
    print(f"✅ PDF saved at: {pdf_path}")

    try:
        print("🔄 Converting PDF to images...")
        images = convert_from_path(pdf_path, poppler_path=POPPLER_PATH)
        print(f"✅ PDF converted into {len(images)} pages.")
    except Exception as e:
        print(f"❌ PDF conversion failed: {e}")
        return HTML_FORM.format(results=f"<p style='color:red;'>Error converting PDF: {e}</p>")

    results_all = []
    for idx, img in enumerate(images):
        image_filename = f"{pdf_id}_page_{idx+1}.png"
        image_path = os.path.join(IMAGES_FOLDER, image_filename)
        img.save(image_path, "PNG")
        print(f"🖼 Saved page {idx+1} as image: {image_path}")

        try:
            print(f"🔍 Running OCR on page {idx+1}...")
            output = pipeline.predict(
                input=image_path,
                output_dir=OUTPUT_FOLDER,
                save_result=False,
                save_image=False
            )

            for res in output:
                json_path = os.path.join(OUTPUT_FOLDER, f"{pdf_id}_page_{idx+1}.json")
                md_path = os.path.join(OUTPUT_FOLDER, f"{pdf_id}_page_{idx+1}.md")

                res.save_to_json(save_path=json_path)
                res.save_to_markdown(save_path=md_path)
                print(f"✅ OCR results saved: {json_path}, {md_path}")

                results_all.append(f"""
                    <h3>Page {idx+1} - JSON Output</h3>
                    <pre>{Path(json_path).read_text(encoding="utf-8")}</pre>
                    <h3>Page {idx+1} - Markdown Output</h3>
                    <pre>{Path(md_path).read_text(encoding="utf-8")}</pre>
                """)
        except Exception as e:
            print(f"❌ OCR failed for page {idx+1}: {e}")
            results_all.append(f"<p style='color:red;'>OCR failed for page {idx+1}: {e}</p>")

    print("🎯 Processing complete.")
    results_html = "".join(results_all)
    return HTML_FORM.format(results=results_html)

return HTML_FORM.format(results="")

if name == "main":
print("🚀 Starting Flask app...")
app.run(debug=True)

please give me some better solution how can i implement this ...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

setup issue on GPU and and response time on CPU #16237

Uh oh!

{{title}}

Uh oh!

Replies: 0 comments

Select a reply

Uh oh!

setup issue on GPU and and response time on CPU #16237

Uh oh!

FinbizCode2025 Aug 14, 2025

Folders

Poppler path (Windows & Linux)

Initialize OCR pipeline

HTML UI (inline)

Extract Data from PDF

Replies: 0 comments

FinbizCode2025
Aug 14, 2025