AttributeError: Execute PaddleOCR / PPStructure(**params)(ImageArray) #12998

xxleonn · 2023-11-16T04:23:24Z

xxleonn
Nov 16, 2023

请提供下述完整信息以便快速定位问题/Please provide the following information to quickly locate the problem

系统环境/System Environment：Windows10, CPU
版本号/Version：Paddle：2.5.2 PaddleOCR： 2.6.0.1
问题相关组件/Related components：PaddleOCR
运行指令/Command Code：

from img2table.document import Image
from img2table.ocr import PaddleOCR

ocr = PaddleOCR(lang="en",
kw={
'ocr_version': 'PP-OCRv3',
'structure_version': 'PP-StructureV2',
'det_model_dir': 'C:/xxxx/PaddleOCR/ch_PP-OCRv3_det_infer',
'rec_model_dir': 'C:/xxxx/PaddleOCR/en_PP-OCRv3_rec_infer',
'cls_model_dir': 'C:/xxxx/PaddleOCR/ch_ppocr_mobile_v2.0_cls_infer',
'table_model_dir': 'C:/xxxx/PaddleOCR/en_ppstructure_mobile_v2.0_SLANet_infer',
'layout_model_dir': 'C:/xxxx/PaddleOCR/picodet_lcnet_x1_0_fgd_layout_infer',
'lang': 'en',
})

doc = Image(f"image.png")

extracted_tables = doc.extract_tables(ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)

完整报错/Complete Error Message：

AttributeError Traceback (most recent call last)
Cell In[11], line 16
13 doc = Img2TableImage(r"C:/xxxxx/Image.png")
15 # Table extraction
---> 16 extracted_tables = doc.extract_tables(ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)
17 doc.to_xlsx('tables1.xlsx', ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\image.py:42, in Image.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
32 def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False,
33 min_confidence: int = 50) -> List[ExtractedTable]:
34 """
35 Extract tables from document
36 :param ocr: OCRInstance object used to extract table content
(...)
40 :return: list of extracted tables
41 """
---> 42 extracted_tables = super(Image, self).extract_tables(ocr=ocr,
43 implicit_rows=implicit_rows,
44 borderless_tables=borderless_tables,
45 min_confidence=min_confidence)
46 return extracted_tables.get(0)

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:126, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
120 tables = {idx: TableImage(img=img,
121 min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows,
122 borderless_tables=borderless_tables)
123 for idx, img in enumerate(self.images)}
125 # Update table content with OCR if possible
--> 126 tables = self.get_table_content(tables=tables,
127 ocr=ocr,
128 min_confidence=min_confidence)
130 # If pages have been defined, modify tables keys
131 if self.pages:

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:85, in Document.get_table_content(self, tables, ocr, min_confidence)
83 # Get OCRDataFrame object
84 if self.ocr_df is None and ocr is not None:
---> 85 self.ocr_df = ocr.of(document=ocr_doc)
87 # Retrieve table contents with ocr
88 for idx, page in enumerate(table_pages):

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\base.py:40, in OCRInstance.of(self, document)
34 """
35 Extract text from Document to OCRDataframe object
36 :param document: Document object
37 :return: OCRDataframe object
38 """
39 # Extract content from document
---> 40 content = self.content(document=document)
42 # Create OCRDataframe from content
43 return self.to_ocr_dataframe(content=content)

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in PaddleOCR.content(self, document)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in (.0)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs

File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:59, in PaddleOCR.hocr(self, image)
56 cv2.imwrite(tmp_file, image)
58 # Get OCR
---> 59 ocr_result = self.ocr.ocr(img=tmp_file, cls=False)
61 # Remove temporary file
62 while os.path.exists(tmp_file):

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\paddleocr.py:524, in PaddleOCR.ocr(self, img, det, rec, cls)
521 img = check_img(img)
523 if det and rec:
--> 524 dt_boxes, rec_res, _ = self.call(img, cls)
525 return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
526 elif det and not rec:

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\tools\infer\predict_system.py:71, in TextSystem.call(self, img, cls)
69 start = time.time()
70 ori_im = img.copy()
---> 71 dt_boxes, elapse = self.text_detector(img)
72 time_dict['det'] = elapse
73 logger.debug("dt_boxes num : {}, elapse : {}".format(
74 len(dt_boxes), elapse))

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\tools\infer\predict_det.py:260, in TextDetector.call(self, img)
257 raise NotImplementedError
259 #self.predictor.try_shrink_memory()
--> 260 post_result = self.postprocess_op(preds, shape_list)
261 dt_boxes = post_result[0]['points']
262 if (self.det_algorithm == "SAST" and self.det_sast_polygon) or (
263 self.det_algorithm in ["PSE", "FCE"] and
264 self.postprocess_op.box_type == 'poly'):

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:240, in DBPostProcess.call(self, outs_dict, shape_list)
237 boxes, scores = self.polygons_from_bitmap(pred[batch_index],
238 mask, src_w, src_h)
239 else:
--> 240 boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
241 src_w, src_h)
243 boxes_batch.append({'points': boxes})
244 return boxes_batch

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:131, in DBPostProcess.boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height)
129 points = np.array(points)
130 if self.score_mode == "fast":
--> 131 score = self.box_score_fast(pred, points.reshape(-1, 2))
132 else:
133 score = self.box_score_slow(pred, contour)

File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:188, in DBPostProcess.box_score_fast(self, bitmap, _box)
186 h, w = bitmap.shape[:2]
187 box = _box.copy()
--> 188 xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
189 xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
190 ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)

File ~\Desktop\Python_V10\python10\lib\site-packages\numpy_init_.py:338, in getattr(attr)
333 warnings.warn(
334 f"In the future np.{attr} will be defined as the "
335 "corresponding NumPy scalar.", FutureWarning, stacklevel=2)
337 if attr in former_attrs:
--> 338 raise AttributeError(former_attrs[attr])
340 if attr == 'testing':
341 import numpy.testing as testing

AttributeError: module 'numpy' has no attribute 'int'.
np.int was a deprecated alias for the builtin int. To avoid this error in existing code, use int by itself. Doing this will not modify any behavior and is safe. When replacing np.int, you may wish to use e.g. np.int64 or np.int32 to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

我们提供了AceIssueSolver来帮助你解答问题，你是否想要它来解答(请填写yes/no)?/We provide AceIssueSolver to solve issues, do you want it? (Please write yes/no):

请尽量不要包含图片在问题中/Please try to not include the image in the issue.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AttributeError: Execute PaddleOCR / PPStructure(**params)(ImageArray) #12998

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{editor}}'s edit

{{editor}}'s edit

Uh oh!

Replies: 0 comments

Select a reply

Uh oh!

AttributeError: Execute PaddleOCR / PPStructure(**params)(ImageArray) #12998

Uh oh!

Uh oh!

xxleonn Nov 16, 2023

Replies: 0 comments

xxleonn
Nov 16, 2023