Skip to content

ppstructtureV3 predict_layout.py 预测出错 #16615

@cqray1990

Description

@cqray1990

🔎 Search before asking

🐛 Bug (问题描述)

/PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 147, in
main(args)
File "PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 132, in main
layout_res, elapse = layout_predictor(img)
^^^^^^^^^^^^^^^^^^^^^
File "PaddleOCR/ppstructure/layout/predict_layout_cus.py", line 96, in call
self.predictor.run()
ValueError: In user code:

InvalidArgumentError: The input of Op(Conv) should be a 4-D or 5-D Tensor. But received: input's dimension is -1, input's shape is [].
  [Hint: Expected in_dims.size() == 4 || in_dims.size() == 5 == true, but received in_dims.size() == 4 || in_dims.size() == 5:0 != true:1.] (at /paddle/paddle/phi/kernels/cpu/conv_util.h:130)
  [operator < pd_kernel.phi_kernel > error]

🏃‍♂️ Environment (运行环境)

paddleocr 3.2.0.dev61+gdfd8efa04.d20250915 ./PaddleOCR
paddlepaddle-gpu 3.2.0
paddlex 3.2.1

🌰 Minimal Reproducible Example (最小可复现问题的Demo)

      import os
      import sys
      
      __dir__ = os.path.dirname(os.path.abspath(__file__))
      sys.path.append(__dir__)
      sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../..")))
      
      os.environ["FLAGS_allocator_strategy"] = "auto_growth"
      
      import cv2
      import numpy as np
      import time
      
      import tools.infer.utility as utility
      from ppocr.data import create_operators, transform
      from ppocr.postprocess import build_post_process
      from ppocr.utils.logging import get_logger
      from ppocr.utils.utility import get_image_file_list, check_and_read
      from ppstructure.utility import parse_args
      from picodet_postprocess import PicoDetPostProcess
      
      logger = get_logger()
      
      
      class LayoutPredictor(object):
          def __init__(self, args):
              pre_process_list = [
                  {"Resize": {"size": [800, 608]}},
                  {
                      "NormalizeImage": {
                          "std": [0.229, 0.224, 0.225],
                          "mean": [0.485, 0.456, 0.406],
                          "scale": "1./255.",
                          "order": "hwc",
                      }
                  },
                  {"ToCHWImage": None},
                  {"KeepKeys": {"keep_keys": ["image"]}},
              ]
              postprocess_params = {
                  "name": "PicoDetPostProcess",
                  "layout_dict_path": args.layout_dict_path,
                  "score_threshold": args.layout_score_threshold,
                  "nms_threshold": args.layout_nms_threshold,
              }
      
              self.preprocess_op = create_operators(pre_process_list)
              self.postprocess_op = build_post_process(postprocess_params)
              (
                  self.predictor,
                  self.input_tensor,
                  self.output_tensors,
                  self.config,
              ) = utility.create_predictor(args, "layout", logger)
              self.use_onnx = args.use_onnx
      
          def __call__(self, img):
              ori_im = img.copy()
              data = {"image": img}
              data = transform(data, self.preprocess_op)
              img = data[0]
      
              if img is None:
                  return None, 0
      
              img = np.expand_dims(img, axis=0)
              img = img.copy()
      
              preds, elapse = 0, 1
              starttime = time.time()
      
              np_score_list, np_boxes_list = [], []
              if self.use_onnx:
                  input_dict = {}
                  input_dict[self.input_tensor.name] = img
                  outputs = self.predictor.run(self.output_tensors, input_dict)
                  num_outs = int(len(outputs) / 2)
                  for out_idx in range(num_outs):
                      np_score_list.append(outputs[out_idx])
                      np_boxes_list.append(outputs[out_idx + num_outs])
              else:
                  self.input_tensor.copy_from_cpu(img)
                  self.predictor.run()
                  output_names = self.predictor.get_output_names()
                  num_outs = int(len(output_names) / 2)
                  for out_idx in range(num_outs):
                      np_score_list.append(
                          self.predictor.get_output_handle(
                              output_names[out_idx]
                          ).copy_to_cpu()
                      )
                      np_boxes_list.append(
                          self.predictor.get_output_handle(
                              output_names[out_idx + num_outs]
                          ).copy_to_cpu()
                      )
              preds = dict(boxes=np_score_list, boxes_num=np_boxes_list)
      
              post_preds = self.postprocess_op(ori_im, img, preds)
              elapse = time.time() - starttime
              return post_preds, elapse
      
      
      def main(args):
          image_file_list = get_image_file_list(args.image_dir)
          layout_predictor = LayoutPredictor(args)
          count = 0
          total_time = 0
      
          repeats = 50
          for image_file in image_file_list:
              img, flag, _ = check_and_read(image_file)
              if not flag:
                  img = cv2.imread(image_file)
              if img is None:
                  logger.info("error in loading image:{}".format(image_file))
                  continue
      
              layout_res, elapse = layout_predictor(img)
      
              logger.info("result: {}".format(layout_res))
      
              if count > 0:
                  total_time += elapse
              count += 1
              logger.info("Predict time of {}: {}".format(image_file, elapse))
      
      
      if __name__ == "__main__":
          args = parse_args()
          args.layout_dict_path = "/PaddleOCR/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt"
          args.image_dir = "PaddleOCR/ppstructure/pp_structure_v3_demo.png"
          args.layout_model_dir = "/.paddlex/official_models/PP-DocLayout_plus-L"
          main(args)

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions