You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:126, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
120 tables = {idx: TableImage(img=img,
121 min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows,
122 borderless_tables=borderless_tables)
123 for idx, img in enumerate(self.images)}
125 # Update table content with OCR if possible
--> 126 tables = self.get_table_content(tables=tables,
127 ocr=ocr,
128 min_confidence=min_confidence)
130 # If pages have been defined, modify tables keys
131 if self.pages:
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:85, in Document.get_table_content(self, tables, ocr, min_confidence)
83 # Get OCRDataFrame object
84 if self.ocr_df is None and ocr is not None:
---> 85 self.ocr_df = ocr.of(document=ocr_doc)
87 # Retrieve table contents with ocr
88 for idx, page in enumerate(table_pages):
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\base.py:40, in OCRInstance.of(self, document)
34 """
35 Extract text from Document to OCRDataframe object
36 :param document: Document object
37 :return: OCRDataframe object
38 """
39 # Extract content from document
---> 40 content = self.content(document=document)
42 # Create OCRDataframe from content
43 return self.to_ocr_dataframe(content=content)
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in PaddleOCR.content(self, document)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in (.0)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:59, in PaddleOCR.hocr(self, image)
56 cv2.imwrite(tmp_file, image)
58 # Get OCR
---> 59 ocr_result = self.ocr.ocr(img=tmp_file, cls=False)
61 # Remove temporary file
62 while os.path.exists(tmp_file):
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\paddleocr.py:524, in PaddleOCR.ocr(self, img, det, rec, cls)
521 img = check_img(img)
523 if det and rec:
--> 524 dt_boxes, rec_res, _ = self.call(img, cls)
525 return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
526 elif det and not rec:
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:188, in DBPostProcess.box_score_fast(self, bitmap, _box)
186 h, w = bitmap.shape[:2]
187 box = _box.copy()
--> 188 xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
189 xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
190 ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
File ~\Desktop\Python_V10\python10\lib\site-packages\numpy_init_.py:338, in getattr(attr)
333 warnings.warn(
334 f"In the future np.{attr} will be defined as the "
335 "corresponding NumPy scalar.", FutureWarning, stacklevel=2)
337 if attr in former_attrs:
--> 338 raise AttributeError(former_attrs[attr])
340 if attr == 'testing':
341 import numpy.testing as testing
AttributeError: module 'numpy' has no attribute 'int'. np.int was a deprecated alias for the builtin int. To avoid this error in existing code, use int by itself. Doing this will not modify any behavior and is safe. When replacing np.int, you may wish to use e.g. np.int64 or np.int32 to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
我们提供了AceIssueSolver来帮助你解答问题,你是否想要它来解答(请填写yes/no)?/We provide AceIssueSolver to solve issues, do you want it? (Please write yes/no):
请尽量不要包含图片在问题中/Please try to not include the image in the issue.
This discussion was converted from issue #11265 on June 10, 2024 07:50.
Heading
Bold
Italic
Quote
Code
Link
Numbered list
Unordered list
Task list
Attach files
Mention
Reference
Menu
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
请提供下述完整信息以便快速定位问题/Please provide the following information to quickly locate the problem
from img2table.document import Image
from img2table.ocr import PaddleOCR
ocr = PaddleOCR(lang="en",
kw={
'ocr_version': 'PP-OCRv3',
'structure_version': 'PP-StructureV2',
'det_model_dir': 'C:/xxxx/PaddleOCR/ch_PP-OCRv3_det_infer',
'rec_model_dir': 'C:/xxxx/PaddleOCR/en_PP-OCRv3_rec_infer',
'cls_model_dir': 'C:/xxxx/PaddleOCR/ch_ppocr_mobile_v2.0_cls_infer',
'table_model_dir': 'C:/xxxx/PaddleOCR/en_ppstructure_mobile_v2.0_SLANet_infer',
'layout_model_dir': 'C:/xxxx/PaddleOCR/picodet_lcnet_x1_0_fgd_layout_infer',
'lang': 'en',
})
doc = Image(f"image.png")
extracted_tables = doc.extract_tables(ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)
AttributeError Traceback (most recent call last)
Cell In[11], line 16
13 doc = Img2TableImage(r"C:/xxxxx/Image.png")
15 # Table extraction
---> 16 extracted_tables = doc.extract_tables(ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)
17 doc.to_xlsx('tables1.xlsx', ocr = ocr, implicit_rows = False, borderless_tables = False, min_confidence = 0)
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\image.py:42, in Image.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
32 def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False,
33 min_confidence: int = 50) -> List[ExtractedTable]:
34 """
35 Extract tables from document
36 :param ocr: OCRInstance object used to extract table content
(...)
40 :return: list of extracted tables
41 """
---> 42 extracted_tables = super(Image, self).extract_tables(ocr=ocr,
43 implicit_rows=implicit_rows,
44 borderless_tables=borderless_tables,
45 min_confidence=min_confidence)
46 return extracted_tables.get(0)
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:126, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
120 tables = {idx: TableImage(img=img,
121 min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows,
122 borderless_tables=borderless_tables)
123 for idx, img in enumerate(self.images)}
125 # Update table content with OCR if possible
--> 126 tables = self.get_table_content(tables=tables,
127 ocr=ocr,
128 min_confidence=min_confidence)
130 # If pages have been defined, modify tables keys
131 if self.pages:
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\document\base_init_.py:85, in Document.get_table_content(self, tables, ocr, min_confidence)
83 # Get OCRDataFrame object
84 if self.ocr_df is None and ocr is not None:
---> 85 self.ocr_df = ocr.of(document=ocr_doc)
87 # Retrieve table contents with ocr
88 for idx, page in enumerate(table_pages):
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\base.py:40, in OCRInstance.of(self, document)
34 """
35 Extract text from Document to OCRDataframe object
36 :param document: Document object
37 :return: OCRDataframe object
38 """
39 # Extract content from document
---> 40 content = self.content(document=document)
42 # Create OCRDataframe from content
43 return self.to_ocr_dataframe(content=content)
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in PaddleOCR.content(self, document)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:74, in (.0)
72 def content(self, document: Document) -> List[List]:
73 # Get OCR of all images
---> 74 ocrs = [self.hocr(image=image) for image in document.images]
76 return ocrs
File ~\Desktop\Python_V10\python10\lib\site-packages\img2table\ocr\paddle.py:59, in PaddleOCR.hocr(self, image)
56 cv2.imwrite(tmp_file, image)
58 # Get OCR
---> 59 ocr_result = self.ocr.ocr(img=tmp_file, cls=False)
61 # Remove temporary file
62 while os.path.exists(tmp_file):
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\paddleocr.py:524, in PaddleOCR.ocr(self, img, det, rec, cls)
521 img = check_img(img)
523 if det and rec:
--> 524 dt_boxes, rec_res, _ = self.call(img, cls)
525 return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
526 elif det and not rec:
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\tools\infer\predict_system.py:71, in TextSystem.call(self, img, cls)
69 start = time.time()
70 ori_im = img.copy()
---> 71 dt_boxes, elapse = self.text_detector(img)
72 time_dict['det'] = elapse
73 logger.debug("dt_boxes num : {}, elapse : {}".format(
74 len(dt_boxes), elapse))
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\tools\infer\predict_det.py:260, in TextDetector.call(self, img)
257 raise NotImplementedError
259 #self.predictor.try_shrink_memory()
--> 260 post_result = self.postprocess_op(preds, shape_list)
261 dt_boxes = post_result[0]['points']
262 if (self.det_algorithm == "SAST" and self.det_sast_polygon) or (
263 self.det_algorithm in ["PSE", "FCE"] and
264 self.postprocess_op.box_type == 'poly'):
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:240, in DBPostProcess.call(self, outs_dict, shape_list)
237 boxes, scores = self.polygons_from_bitmap(pred[batch_index],
238 mask, src_w, src_h)
239 else:
--> 240 boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
241 src_w, src_h)
243 boxes_batch.append({'points': boxes})
244 return boxes_batch
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:131, in DBPostProcess.boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height)
129 points = np.array(points)
130 if self.score_mode == "fast":
--> 131 score = self.box_score_fast(pred, points.reshape(-1, 2))
132 else:
133 score = self.box_score_slow(pred, contour)
File ~\Desktop\Python_V10\python10\lib\site-packages\paddleocr\ppocr\postprocess\db_postprocess.py:188, in DBPostProcess.box_score_fast(self, bitmap, _box)
186 h, w = bitmap.shape[:2]
187 box = _box.copy()
--> 188 xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
189 xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
190 ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
File ~\Desktop\Python_V10\python10\lib\site-packages\numpy_init_.py:338, in getattr(attr)
333 warnings.warn(
334 f"In the future
np.{attr}
will be defined as the "335 "corresponding NumPy scalar.", FutureWarning, stacklevel=2)
337 if attr in former_attrs:
--> 338 raise AttributeError(former_attrs[attr])
340 if attr == 'testing':
341 import numpy.testing as testing
AttributeError: module 'numpy' has no attribute 'int'.
np.int
was a deprecated alias for the builtinint
. To avoid this error in existing code, useint
by itself. Doing this will not modify any behavior and is safe. When replacingnp.int
, you may wish to use e.g.np.int64
ornp.int32
to specify the precision. If you wish to review your current use, check the release note link for additional information.The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
我们提供了AceIssueSolver来帮助你解答问题,你是否想要它来解答(请填写yes/no)?/We provide AceIssueSolver to solve issues, do you want it? (Please write yes/no):
请尽量不要包含图片在问题中/Please try to not include the image in the issue.
Beta Was this translation helpful? Give feedback.
All reactions