Skip to content

Commit cb034aa

Browse files
authored
Merge pull request #93 from aspose-pdf-cloud/refactored-parser
Refactored Parser use cases
2 parents 5945baf + 6dc57e3 commit cb034aa

File tree

7 files changed

+278
-0
lines changed

7 files changed

+278
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"path"
6+
7+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
8+
)
9+
10+
func ParseExtractFormsAsFDF(pdf_api *asposepdfcloud.PdfApiService, documentName string, outputFDFName string, remoteFolder string) {
11+
// Extract Form fields from the document to FDF file
12+
uploadFile(pdf_api, documentName)
13+
14+
args := map[string]interface{}{
15+
"folder": remoteFolder,
16+
}
17+
18+
fdfPath := path.Join(remoteFolder, outputFDFName)
19+
20+
_, httpResponse, err := pdf_api.PutExportFieldsFromPdfToFdfInStorage(documentName, fdfPath, args)
21+
if err != nil {
22+
fmt.Println(err.Error())
23+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
24+
fmt.Println("ParseExtractFormsAsFDF(): Failed to extract Form fields from the document.")
25+
} else {
26+
fmt.Println("ParseExtractFormsAsFDF(): Forms fields successfully extracted from the document '" + documentName + "'.")
27+
downloadFile(pdf_api, outputFDFName, outputFDFName)
28+
}
29+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"path"
6+
7+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
8+
)
9+
10+
func ParseExtractFormsAsXML(pdf_api *asposepdfcloud.PdfApiService, documentName string, outputXMLName string, remoteFolder string) {
11+
// Extract Form fields from the document to XML file
12+
uploadFile(pdf_api, documentName)
13+
14+
args := map[string]interface{}{
15+
"folder": remoteFolder,
16+
}
17+
18+
xmlPath := path.Join(remoteFolder, outputXMLName)
19+
20+
_, httpResponse, err := pdf_api.PutExportFieldsFromPdfToXmlInStorage(documentName, xmlPath, args)
21+
if err != nil {
22+
fmt.Println(err.Error())
23+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
24+
fmt.Println("ParseExtractFormsAsXML(): Failed to extract Form fields from the document.")
25+
} else {
26+
fmt.Println("ParseExtractFormsAsXML(): Forms fields successfully extracted from the document '" + documentName + "'.")
27+
downloadFile(pdf_api, outputXMLName, outputXMLName)
28+
}
29+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"path"
7+
8+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
9+
)
10+
11+
func ParseExtractImages(pdf_api *asposepdfcloud.PdfApiService, documentName string, pageNumber int32, localFolder string, remoteFolder string) {
12+
// Extract Images from the page of PDF document
13+
uploadFile(pdf_api, documentName)
14+
15+
args := map[string]interface{}{
16+
"folder": remoteFolder,
17+
}
18+
19+
respImages, httpResponse, err := pdf_api.GetImages(documentName, pageNumber, args)
20+
if err != nil {
21+
fmt.Println(err.Error())
22+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
23+
fmt.Println("ParseExtractImages(): Failed to extract images from the page of document.")
24+
} else {
25+
for _, image := range respImages.Images.List {
26+
27+
response, httpResponse, err := pdf_api.GetImageExtractAsPng(documentName, image.Id, args)
28+
29+
if err != nil {
30+
fmt.Println(err.Error())
31+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
32+
fmt.Println("ParseExtractImages(): Failed to extract image.")
33+
} else {
34+
fmt.Println("ParseExtractImages(): Images'" + image.Id + "' successfully extracted from the page of document.")
35+
36+
fileName := path.Join(localFolder, (image.Id + ".png"))
37+
f, _ := os.Create(fileName)
38+
_, _ = f.Write(response)
39+
fmt.Println("File '" + fileName + "' successfully downloaded.")
40+
}
41+
}
42+
}
43+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"os"
7+
"path"
8+
9+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
10+
)
11+
12+
func ParseExtractTables(pdf_api *asposepdfcloud.PdfApiService, documentName string, localFolder string, remoteFolder string) {
13+
// Extract tables form the document
14+
uploadFile(pdf_api, documentName)
15+
16+
args := map[string]interface{}{
17+
"folder": remoteFolder,
18+
}
19+
20+
result, httpResponse, err := pdf_api.GetDocumentTables(documentName, args)
21+
if err != nil {
22+
fmt.Println(err.Error())
23+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
24+
fmt.Println("ExtractTables(): Failed to extract tables from the document.")
25+
} else {
26+
if result.Tables == nil || len(result.Tables.List) == 0 {
27+
fmt.Println("ExtractTables(): Tables not found in the document.")
28+
} else {
29+
resultJson := "[\n"
30+
for _, t := range result.Tables.List {
31+
respTable, httpResponse, err := pdf_api.GetTable(documentName, t.Id, args)
32+
if err != nil {
33+
fmt.Println(err.Error())
34+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
35+
fmt.Println("ExtractTables(): Failed to extract table from the document.")
36+
} else {
37+
fmt.Println("table", respTable.Table)
38+
jsTable, _ := json.Marshal(respTable.Table)
39+
resultJson += string(jsTable) + ",\n\n"
40+
}
41+
}
42+
resultJson += "]"
43+
fileName := path.Join(localFolder, ("parsed_tables_output.json"))
44+
f, _ := os.Create(fileName)
45+
_, _ = f.Write([]byte(resultJson))
46+
fmt.Println("File '" + fileName + "' successfully downloaded.")
47+
}
48+
}
49+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"os"
7+
"path"
8+
9+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
10+
)
11+
12+
func ParseExtractTextBoxes(pdf_api *asposepdfcloud.PdfApiService, documentName string, localFolder string, remoteFolder string) {
13+
// Extract tables form the document
14+
uploadFile(pdf_api, documentName)
15+
16+
args := map[string]interface{}{
17+
"folder": remoteFolder,
18+
}
19+
20+
result, httpResponse, err := pdf_api.GetDocumentTextBoxFields(documentName, args)
21+
if err != nil {
22+
fmt.Println(err.Error())
23+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
24+
fmt.Println("ParseExtractTextBoxes(): Failed to extract text boxes from the document.")
25+
} else {
26+
if result.Fields == nil || len(result.Fields.List) == 0 {
27+
fmt.Println("ParseExtractTextBoxes(): Text boxes not found in the document.")
28+
} else {
29+
resultJson := "[\n"
30+
for _, t := range result.Fields.List {
31+
respTextBox, httpResponse, err := pdf_api.GetTextBoxField(documentName, t.FullName, args)
32+
if err != nil {
33+
fmt.Println(err.Error())
34+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
35+
fmt.Println("ParseExtractTextBoxes(): Failed to extract text box from the document.")
36+
} else {
37+
fmt.Println("TextBox", respTextBox.Field)
38+
jsTable, _ := json.Marshal(respTextBox.Field)
39+
resultJson += string(jsTable) + ",\n\n"
40+
}
41+
}
42+
resultJson += "]"
43+
fileName := path.Join(localFolder, ("parsed_taext_boxes_output.json"))
44+
f, _ := os.Create(fileName)
45+
_, _ = f.Write([]byte(resultJson))
46+
fmt.Println("File '" + fileName + "' successfully downloaded.")
47+
}
48+
}
49+
}

uses_cases/parser/parser_helper.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"path"
7+
"path/filepath"
8+
9+
asposepdfcloud "github.com/aspose-pdf-cloud/aspose-pdf-cloud-go/v25"
10+
)
11+
12+
const (
13+
REMOTE_FOLDER = "Your_Temp_Pdf_Cloud"
14+
LOCAL_FOLDER = "c:\\Samples"
15+
PDF_DOCUMENT = "sample.pdf"
16+
PDF_OUTPUT = "output_pages.pdf"
17+
XML_OUTPUT_FILE = "output_sample.xml"
18+
FDF_OUTPUT_FILE = "output_sample.fdf"
19+
PAGE_NUMBER = 1
20+
21+
AppSID = "**********" // Your Application SID
22+
AppKey = "**********" // Your Application Key
23+
)
24+
25+
func initPdfApi() *asposepdfcloud.PdfApiService {
26+
pdfApi := asposepdfcloud.NewPdfApiService(AppSID, AppKey, "")
27+
return pdfApi
28+
}
29+
30+
// Upload local file to the remote folder with check errors
31+
func uploadFile(pdf_api *asposepdfcloud.PdfApiService, name string) {
32+
args := map[string]interface{}{
33+
"folder": REMOTE_FOLDER,
34+
}
35+
file, err := os.Open(filepath.Join(LOCAL_FOLDER, name))
36+
if err != nil {
37+
fmt.Println(err.Error())
38+
} else {
39+
_, httpResponse, err := pdf_api.UploadFile(path.Join(REMOTE_FOLDER, name), file, args)
40+
if err != nil {
41+
fmt.Println(err.Error())
42+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
43+
fmt.Println("Unexpected error!")
44+
} else {
45+
fmt.Println("File '" + name + " ' successfully uploaded.")
46+
}
47+
}
48+
}
49+
50+
// Download file from remote folder and save it locally with check errors
51+
func downloadFile(pdf_api *asposepdfcloud.PdfApiService, name string, output_name string) {
52+
args := map[string]interface{}{
53+
"folder": REMOTE_FOLDER,
54+
}
55+
result_data, httpResponse, err := pdf_api.DownloadFile(path.Join(REMOTE_FOLDER, name), args)
56+
if err != nil {
57+
fmt.Println(err.Error())
58+
} else if httpResponse.StatusCode < 200 || httpResponse.StatusCode > 299 {
59+
fmt.Println("Unexpected error!")
60+
} else {
61+
fileName := path.Join(LOCAL_FOLDER, output_name)
62+
f, _ := os.Create(fileName)
63+
_, _ = f.Write(result_data)
64+
fmt.Println("File '" + fileName + "' successfully downloaded.")
65+
}
66+
}

uses_cases/parser/parser_launch.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package main
2+
3+
func main() {
4+
pdfApi := initPdfApi()
5+
6+
ParseExtractFormsAsXML(pdfApi, PDF_DOCUMENT, XML_OUTPUT_FILE, REMOTE_FOLDER)
7+
ParseExtractFormsAsFDF(pdfApi, PDF_DOCUMENT, FDF_OUTPUT_FILE, REMOTE_FOLDER)
8+
9+
ParseExtractImages(pdfApi, PDF_DOCUMENT, PAGE_NUMBER, LOCAL_FOLDER, REMOTE_FOLDER)
10+
ParseExtractTables(pdfApi, PDF_DOCUMENT, LOCAL_FOLDER, REMOTE_FOLDER)
11+
ParseExtractTextBoxes(pdfApi, PDF_DOCUMENT, LOCAL_FOLDER, REMOTE_FOLDER)
12+
13+
}

0 commit comments

Comments
 (0)