-
Notifications
You must be signed in to change notification settings - Fork 8.8k
Open
Description
🔎 Search before asking
🐛 Bug (问题描述)
/PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 147, in
main(args)
File "PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 132, in main
layout_res, elapse = layout_predictor(img)
^^^^^^^^^^^^^^^^^^^^^
File "PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 96, in call
self.predictor.run()
ValueError: In user code:
InvalidArgumentError: The input of Op(Conv) should be a 4-D or 5-D Tensor. But received: input's dimension is -1, input's shape is [].
[Hint: Expected in_dims.size() == 4 || in_dims.size() == 5 == true, but received in_dims.size() == 4 || in_dims.size() == 5:0 != true:1.] (at /paddle/paddle/phi/kernels/cpu/conv_util.h:130)
[operator < pd_kernel.phi_kernel > error]
🏃♂️ Environment (运行环境)
paddleocr 3.2.0.dev61+gdfd8efa04.d20250915 ./PaddleOCR
paddlepaddle-gpu 3.2.0
paddlex 3.2.1
🌰 Minimal Reproducible Example (最小可复现问题的Demo)
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../..")))
os.environ["FLAGS_allocator_strategy"] = "auto_growth"
import cv2
import numpy as np
import time
import tools.infer.utility as utility
from ppocr.data import create_operators, transform
from ppocr.postprocess import build_post_process
from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, check_and_read
from ppstructure.utility import parse_args
from picodet_postprocess import PicoDetPostProcess
logger = get_logger()
class LayoutPredictor(object):
def __init__(self, args):
pre_process_list = [
{"Resize": {"size": [800, 608]}},
{
"NormalizeImage": {
"std": [0.229, 0.224, 0.225],
"mean": [0.485, 0.456, 0.406],
"scale": "1./255.",
"order": "hwc",
}
},
{"ToCHWImage": None},
{"KeepKeys": {"keep_keys": ["image"]}},
]
postprocess_params = {
"name": "PicoDetPostProcess",
"layout_dict_path": args.layout_dict_path,
"score_threshold": args.layout_score_threshold,
"nms_threshold": args.layout_nms_threshold,
}
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
(
self.predictor,
self.input_tensor,
self.output_tensors,
self.config,
) = utility.create_predictor(args, "layout", logger)
self.use_onnx = args.use_onnx
def __call__(self, img):
ori_im = img.copy()
data = {"image": img}
data = transform(data, self.preprocess_op)
img = data[0]
if img is None:
return None, 0
img = np.expand_dims(img, axis=0)
img = img.copy()
preds, elapse = 0, 1
starttime = time.time()
np_score_list, np_boxes_list = [], []
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(self.output_tensors, input_dict)
num_outs = int(len(outputs) / 2)
for out_idx in range(num_outs):
np_score_list.append(outputs[out_idx])
np_boxes_list.append(outputs[out_idx + num_outs])
else:
self.input_tensor.copy_from_cpu(img)
self.predictor.run()
output_names = self.predictor.get_output_names()
num_outs = int(len(output_names) / 2)
for out_idx in range(num_outs):
np_score_list.append(
self.predictor.get_output_handle(
output_names[out_idx]
).copy_to_cpu()
)
np_boxes_list.append(
self.predictor.get_output_handle(
output_names[out_idx + num_outs]
).copy_to_cpu()
)
preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)
post_preds = self.postprocess_op(ori_im, img, preds)
elapse = time.time() - starttime
return post_preds, elapse
def main(args):
image_file_list = get_image_file_list(args.image_dir)
layout_predictor = LayoutPredictor(args)
count = 0
total_time = 0
repeats = 50
for image_file in image_file_list:
img, flag, _ = check_and_read(image_file)
if not flag:
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
layout_res, elapse = layout_predictor(img)
logger.info("result: {}".format(layout_res))
if count > 0:
total_time += elapse
count += 1
logger.info("Predict time of {}: {}".format(image_file, elapse))
if __name__ == "__main__":
args = parse_args()
args.layout_dict_path = "/PaddleOCR/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt"
args.image_dir = "PaddleOCR/ppstructure/pp_structure_v3_demo.png"
args.layout_model_dir = "/.paddlex/official_models/PP-DocLayout_plus-L"
main(args)
Metadata
Metadata
Assignees
Labels
No labels