Skip to content

Commit 22ddec8

Browse files
authored
Merge pull request #116 from aspose-pdf-cloud/develop
update to 25.9
2 parents e31093b + eb7cc82 commit 22ddec8

File tree

14 files changed

+395
-8
lines changed

14 files changed

+395
-8
lines changed

README.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,10 @@ XLS, XLSX, PPTX, DOC, DOCX, MobiXML, JPEG, EMF, PNG, BMP, GIF, TIFF, Text
3030
## Read PDF Formats
3131
MHT, PCL, PS, XSLFO, MD
3232

33-
## Enhancements in Version 25.8
34-
- Implement document page resize functionality using the Pdf.Cloud API library.
33+
## Enhancements in Version 25.9
34+
- Implement PDF document page crop functionality using the Pdf.Cloud API library.
3535
- A new version of Aspose.PDF Cloud was prepared using the latest version of Aspose.PDF for .NET.
3636

37-
## Bugs fixed in Version 25.8
38-
- Implement delete watermark from PDF document using the Pdf.Cloud API library.
39-
4037
## Requirements.
4138
Python 2.7 and 3.4+
4239

Uses-Cases/Parser/get_fdf.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from paresr_helpers import ParesrHelper
2+
from pathlib import Path
3+
import logging
4+
5+
class ExportFormToFDF:
6+
"""Class for extracting PDF form fields into FDF using Aspose PDF Cloud API."""
7+
def __init__(self, helper: ParesrHelper):
8+
self.helper = helper
9+
10+
def Extract(self, documentName: str, outputFDFName: str, localFolder: Path, remoteFolder: str ):
11+
self.helper.upload_document(documentName, remoteFolder)
12+
13+
fdfPath = str(Path.joinpath(Path(remoteFolder), outputFDFName))
14+
opts = {
15+
"folder": remoteFolder
16+
}
17+
response = self.helper.pdf_api.put_export_fields_from_pdf_to_fdf_in_storage(documentName, fdfPath, **opts)
18+
if response.code != 200:
19+
logging.error("ExportFormToFDF(): Unexpected error!")
20+
else:
21+
logging.info(f"ExportFormToFDF(): Pdf document '{documentName}' form fields successfully exported to '{outputFDFName}' file.")
22+
self.helper.downloadFile(outputFDFName, outputFDFName, localFolder, remoteFolder, "")

Uses-Cases/Parser/get_images.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from paresr_helpers import ParesrHelper
2+
from pathlib import Path
3+
import shutil
4+
import logging
5+
6+
class GetImages:
7+
"""Class for extracting images from PDF document page using Aspose PDF Cloud API."""
8+
def __init__(self, helper: ParesrHelper):
9+
self.helper = helper
10+
11+
def Extract(self, documentName: str, pageNumber: int, localFolder: Path, remoteFolder: Path):
12+
self.helper.upload_document(documentName, remoteFolder)
13+
14+
opts = {
15+
"folder": remoteFolder
16+
}
17+
respImages = self.helper.pdf_api.get_images(documentName, pageNumber, **opts)
18+
if respImages.code != 200:
19+
logging.error("GetImages(): Unexpected error!")
20+
else:
21+
for img in respImages.images.list:
22+
response = self.helper.pdf_api.get_image_extract_as_png(documentName, img.id, **opts)
23+
24+
logging.info(f"GetImages(): Images '{img.id}' successfully extracted from the document '{documentName}'.")
25+
local_path = localFolder / ( img.id + '.png' )
26+
shutil.move(response, str(local_path))

Uses-Cases/Parser/get_tables.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from paresr_helpers import ParesrHelper
2+
from pathlib import Path
3+
import json
4+
import logging
5+
6+
class GetTables:
7+
"""Class for extracting tables from PDF document using Aspose PDF Cloud API."""
8+
def __init__(self, helper: ParesrHelper):
9+
self.helper = helper
10+
11+
def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path):
12+
self.helper.upload_document(documentName, remoteFolder)
13+
14+
opts = {
15+
"folder": remoteFolder
16+
}
17+
respTables = self.helper.pdf_api.get_document_tables(documentName, **opts)
18+
if respTables.code != 200:
19+
logging.error("GetTables(): Unexpected error!")
20+
else:
21+
localJson = Path.joinpath(localFolder, "tables_objects.json")
22+
with open(str(localJson), "w", encoding="utf-8") as localFile:
23+
for tab in respTables.tables.list:
24+
response = self.helper.pdf_api.get_table(documentName, tab.id, **opts)
25+
if response.code != 200:
26+
logging.error("GetTextBoxes(): Unexpected error!")
27+
else:
28+
logging.info(f"GetTabels(): Table '{tab.id}' successfully extracted from the document '{documentName}'.")
29+
json.dump(tab, localFile, ensure_ascii=False,default=str)
30+
localFile.write("\n*********************\n")

Uses-Cases/Parser/get_textboxes.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from paresr_helpers import ParesrHelper
2+
from pathlib import Path
3+
import json
4+
import logging
5+
6+
class GetTextBoxes:
7+
"""Class for extracting text boxes from PDF document using Aspose PDF Cloud API."""
8+
def __init__(self, helper: ParesrHelper):
9+
self.helper = helper
10+
11+
def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path):
12+
self.helper.upload_document(documentName, remoteFolder)
13+
14+
opts = {
15+
"folder": remoteFolder
16+
}
17+
respTextBoxes = self.helper.pdf_api.get_document_text_box_fields(documentName, **opts)
18+
if respTextBoxes.code != 200:
19+
logging.error("GetTextBoxes(): Unexpected error!")
20+
else:
21+
localJson = Path.joinpath(localFolder, "text_box_objects.json")
22+
with open(str(localJson), "w", encoding="utf-8") as localFile:
23+
for textBox in respTextBoxes.fields.list:
24+
response = self.helper.pdf_api.get_text_box_field(documentName, textBox.full_name, **opts)
25+
if response.code != 200:
26+
logging.error("GetTextBoxes(): Unexpected error!")
27+
else:
28+
logging.info(f"GetTextBoxes(): TextBox field '{textBox.full_name}' successfully extracted from the document '{documentName}'.")
29+
json.dump(textBox, localFile, ensure_ascii=False,default=str)
30+
localFile.write("\n*********************\n")

Uses-Cases/Parser/get_xml.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from paresr_helpers import ParesrHelper, Config
2+
from pathlib import Path
3+
import logging
4+
5+
class ExportFormToXXML:
6+
"""Class for extracting PDF form fields into XML using Aspose PDF Cloud API."""
7+
def __init__(self, helper: ParesrHelper):
8+
self.helper = helper
9+
10+
def Extract(self, documentName: str, outputXMLName: str, localFolder: Path, remoteFolder: str ):
11+
self.helper.upload_document(documentName, remoteFolder)
12+
13+
xmlPath = str(Path.joinpath(Path(remoteFolder), outputXMLName))
14+
opts = {
15+
"folder": remoteFolder
16+
}
17+
response = self.helper.pdf_api.put_export_fields_from_pdf_to_xml_in_storage(documentName, xmlPath, **opts)
18+
if response.code != 200:
19+
logging.error("ExportFormToXM(): Unexpected error!")
20+
else:
21+
logging.info(f"ExportFormToXML(): Pdf document '{documentName}' form fields successfully exported to '{outputXMLName}' file.")
22+
self.helper.downloadFile(outputXMLName, outputXMLName, localFolder, remoteFolder, "")
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import shutil
2+
import json
3+
import logging
4+
from pathlib import Path
5+
from asposepdfcloud import ApiClient, PdfApi
6+
7+
# Configure logging
8+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
9+
10+
11+
class Config:
12+
"""Configuration parameters."""
13+
CREDENTIALS_FILE = Path(r"..s\\credentials.json")
14+
LOCAL_FOLDER = Path(r"C:\Samples")
15+
REMOTE_TEMP_FOLDER = "TempPdfCloud"
16+
PDF_DOCUMENT_NAME = "sample.pdf"
17+
XML_OUTPUT_FILE = "output_sample.xml"
18+
FDF_OUTPUT_FILE = "output_sample.fdf"
19+
LOCAL_RESULT_DOCUMENT_NAME = "output_sample.pdf"
20+
PAGE_NUMBER = 1
21+
22+
23+
class ParesrHelper:
24+
"""Class with helper methods and properties for Parser"""
25+
26+
def __init__(self, credentials_file: Path = Config.CREDENTIALS_FILE):
27+
self.pdf_api = None
28+
self._init_api(credentials_file)
29+
30+
def _init_api(self, credentials_file: Path):
31+
"""Initialize the API client."""
32+
try:
33+
with credentials_file.open("r", encoding="utf-8") as file:
34+
credentials = json.load(file)
35+
api_key, app_id = credentials.get("key"), credentials.get("id")
36+
if not api_key or not app_id:
37+
raise ValueError("Error: Missing API keys in the credentials file.")
38+
self.pdf_api = PdfApi(ApiClient(api_key, app_id))
39+
except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
40+
logging.error(f"Failed to load credentials: {e}")
41+
42+
def upload_document(self, documentName: str, remoteFolder: str):
43+
"""Upload a PDF document to the Aspose Cloud server."""
44+
if self.pdf_api:
45+
file_path = Config.LOCAL_FOLDER / documentName
46+
try:
47+
if remoteFolder == None:
48+
self.pdf_api.upload_file(documentName, str(file_path))
49+
else:
50+
opts = { "folder": remoteFolder }
51+
self.pdf_api.upload_file(remoteFolder + '/' + documentName, file_path)
52+
logging.info(f"File {documentName} uploaded successfully.")
53+
except Exception as e:
54+
logging.error(f"Failed to upload file: {e}")
55+
56+
def downloadFile(self, document: str, outputDocument: str, localFolder: Path, remoteFolder: str, output_prefix: str):
57+
"""Download the processed PDF document from the Aspose Cloud server."""
58+
if self.pdf_api:
59+
try:
60+
temp_file = self.pdf_api.download_file(remoteFolder + '/' + document)
61+
local_path = localFolder / ( output_prefix + outputDocument )
62+
shutil.move(temp_file, str(local_path))
63+
logging.info(f"download_result(): File successfully downloaded: {local_path}")
64+
except Exception as e:
65+
logging.error(f"download_result(): Failed to download file: {e}")
66+

Uses-Cases/Parser/parser_launch.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from paresr_helpers import ParesrHelper, Config
2+
from get_xml import ExportFormToXXML
3+
from get_fdf import ExportFormToFDF
4+
from get_images import GetImages
5+
from get_tables import GetTables
6+
from get_textboxes import GetTextBoxes
7+
8+
if __name__ == "__main__":
9+
helper = ParesrHelper(Config.CREDENTIALS_FILE)
10+
11+
xmlExtractor = ExportFormToXXML(helper)
12+
xmlExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.XML_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)
13+
14+
fdfExtractor = ExportFormToFDF(helper)
15+
fdfExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.FDF_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)
16+
17+
getImages = GetImages(helper)
18+
getImages.Extract(Config.PDF_DOCUMENT_NAME, Config.PAGE_NUMBER, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)
19+
20+
getTables = GetTables(helper)
21+
getTables.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)
22+
23+
getTextBoxes = GetTextBoxes(helper)
24+
getTextBoxes.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)

asposepdfcloud/api_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def __init__(self, app_key, app_sid, host=None, self_host=False):
8383
self.rest_client = RESTClientObject()
8484
self.default_headers = {}
8585
self.default_headers['x-aspose-client'] = 'python sdk'
86-
self.default_headers['x-aspose-client-version'] = '25.8.0'
86+
self.default_headers['x-aspose-client-version'] = '25.9.0'
8787

8888
self.self_host = self_host
8989
self.app_key = app_key

asposepdfcloud/apis/pdf_api.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24907,6 +24907,136 @@ def post_document_page_number_stamps_with_http_info(self, name, stamp, **kwargs)
2490724907
_request_timeout=params.get('_request_timeout'),
2490824908
collection_formats=collection_formats)
2490924909

24910+
def post_document_pages_crop(self, name, pages, rect, **kwargs):
24911+
"""
24912+
Crop PDF document pages.
24913+
This method makes a synchronous HTTP request by default. To make an
24914+
asynchronous HTTP request, please define a `callback` function
24915+
to be invoked when receiving the response.
24916+
>>> def callback_function(response):
24917+
>>> pprint(response)
24918+
>>>
24919+
>>> thread = api.post_document_pages_crop(name, pages, rect, callback=callback_function)
24920+
24921+
:param callback function: The callback function
24922+
for asynchronous request. (optional)
24923+
:param str name: The document name. (required)
24924+
:param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required)
24925+
:param Rectangle rect: Rectangle of document area. (required)
24926+
:param str storage: The document storage.
24927+
:param str folder: The document folder.
24928+
:param str password: Base64 encoded password.
24929+
:return: AsposeResponse
24930+
If the method is called asynchronously,
24931+
returns the request thread.
24932+
"""
24933+
kwargs['_return_http_data_only'] = True
24934+
if kwargs.get('callback'):
24935+
return self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs)
24936+
else:
24937+
(data) = self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs)
24938+
return data
24939+
24940+
def post_document_pages_crop_with_http_info(self, name, pages, rect, **kwargs):
24941+
"""
24942+
Crop PDF document pages.
24943+
This method makes a synchronous HTTP request by default. To make an
24944+
asynchronous HTTP request, please define a `callback` function
24945+
to be invoked when receiving the response.
24946+
>>> def callback_function(response):
24947+
>>> pprint(response)
24948+
>>>
24949+
>>> thread = api.post_document_pages_crop_with_http_info(name, pages, rect, callback=callback_function)
24950+
24951+
:param callback function: The callback function
24952+
for asynchronous request. (optional)
24953+
:param str name: The document name. (required)
24954+
:param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required)
24955+
:param Rectangle rect: Rectangle of document area. (required)
24956+
:param str storage: The document storage.
24957+
:param str folder: The document folder.
24958+
:param str password: Base64 encoded password.
24959+
:return: AsposeResponse
24960+
If the method is called asynchronously,
24961+
returns the request thread.
24962+
"""
24963+
24964+
all_params = ['name', 'pages', 'rect', 'storage', 'folder', 'password']
24965+
all_params.append('callback')
24966+
all_params.append('_return_http_data_only')
24967+
all_params.append('_preload_content')
24968+
all_params.append('_request_timeout')
24969+
24970+
params = locals()
24971+
for key, val in iteritems(params['kwargs']):
24972+
if key not in all_params:
24973+
raise TypeError(
24974+
"Got an unexpected keyword argument '%s'"
24975+
" to method post_document_pages_crop" % key
24976+
)
24977+
params[key] = val
24978+
del params['kwargs']
24979+
# verify the required parameter 'name' is set
24980+
if ('name' not in params) or (params['name'] is None):
24981+
raise ValueError("Missing the required parameter `name` when calling `post_document_pages_crop`")
24982+
# verify the required parameter 'pages' is set
24983+
if ('pages' not in params) or (params['pages'] is None):
24984+
raise ValueError("Missing the required parameter `pages` when calling `post_document_pages_crop`")
24985+
# verify the required parameter 'rect' is set
24986+
if ('rect' not in params) or (params['rect'] is None):
24987+
raise ValueError("Missing the required parameter `rect` when calling `post_document_pages_crop`")
24988+
24989+
24990+
collection_formats = {}
24991+
24992+
path_params = {}
24993+
if 'name' in params:
24994+
path_params['name'] = params['name']
24995+
24996+
query_params = []
24997+
if 'pages' in params:
24998+
query_params.append(('pages', params['pages']))
24999+
if 'storage' in params:
25000+
query_params.append(('storage', params['storage']))
25001+
if 'folder' in params:
25002+
query_params.append(('folder', params['folder']))
25003+
if 'password' in params:
25004+
query_params.append(('password', params['password']))
25005+
25006+
header_params = {}
25007+
25008+
form_params = []
25009+
local_var_files = {}
25010+
25011+
body_params = None
25012+
if 'rect' in params:
25013+
body_params = params['rect']
25014+
# HTTP header `Accept`
25015+
header_params['Accept'] = self.api_client.\
25016+
select_header_accept(['application/json'])
25017+
25018+
# HTTP header `Content-Type`
25019+
header_params['Content-Type'] = self.api_client.\
25020+
select_header_content_type(['application/json'])
25021+
25022+
# Authentication setting
25023+
auth_settings = ['JWT']
25024+
25025+
return self.api_client.call_api('/pdf/{name}/crop', 'POST',
25026+
path_params,
25027+
query_params,
25028+
header_params,
25029+
body=body_params,
25030+
post_params=form_params,
25031+
files=local_var_files,
25032+
response_type='AsposeResponse',
25033+
auth_settings=auth_settings,
25034+
callback=params.get('callback'),
25035+
_return_http_data_only=params.get('_return_http_data_only'),
25036+
_preload_content=params.get('_preload_content', True),
25037+
_request_timeout=params.get('_request_timeout'),
25038+
collection_formats=collection_formats)
25039+
2491025040
def post_document_pages_resize(self, name, height, width, pages, **kwargs):
2491125041
"""
2491225042
Rsize PDF document.

0 commit comments

Comments
 (0)