-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
161 lines (130 loc) · 8.4 KB
/
app.py
File metadata and controls
161 lines (130 loc) · 8.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from flask import Flask, jsonify,request
import pdfplumber
import pandas as pd
import re
app = Flask(__name__)
@app.route("/validate",methods = ["POST"])
def get_details():
data = []
try:
if 'airline' not in request.form and 'Airline' not in request.form:
return 'Airline name is missing'
# Get the value of 'airline' from the request
airlines_name = request.form.get('airline') or request.form.get('Airline')
# Check if the airlines name is empty
if not airlines_name:
return 'Airlines name is missing'
# Check if the airlines name is 'Vistara'
if airlines_name.lower()!='vistara':
return 'This api only for vistara'
else:
if 'file' not in request.files:
return 'No file part'
file = request.files['file']
if file.filename == '':
return 'No selected file'
if file.filename.split('.')[-1] != 'pdf':
return "Only read pdf files"
else:
text = ""
with pdfplumber.open(file) as pdf:
for page in pdf.pages:
text += page.extract_text()
# Extracting Invoice Number
invoice_number_match = re.search(r'Invoice No\. (\w+)', text)
invoice_number = invoice_number_match.group(1) if invoice_number_match else "Not found"
# Extracting Invoice Date
invoice_date_match = re.search(r'Invoice Date (\d{2}-[A-Z]{3}-\d{2})', text)
invoice_date = invoice_date_match.group(1) if invoice_date_match else "Not found"
# Extracting GSTIN
gstin_match = re.search(r'\b\d{2}[A-Z]{5}\d{4}[A-Z]\d[Z]\d\b', text)
gstin = gstin_match.group() if gstin_match else "Not found"
# Extracting Place of Supply (State)
place_of_supply_match = re.search(r'Place of supply \(State\) (\w+)', text)
place_of_supply_state = place_of_supply_match.group(1) if place_of_supply_match else "Not found"
# Extracting Bill-to Name or Customer Name
bill_to_name_match = re.search(r'Name\s+(.*?)\s+(?:Credit|Debit) Note Date', text)
bill_to_name = bill_to_name_match.group(1) if bill_to_name_match else "Not found"
# Extracting Company Name
company_name_match = re.search(r"\b\w+\s+\w+\s+\w+\s+(Limited|limited|Pvt|PVT)\b", text)
company_name = company_name_match.group() if company_name_match else "Not found"
# Extracting Company Address
company_add_match = re.search(r'(?<=Limited\n).*',text)
company_add = company_add_match.group() if company_add_match else "Not found"
# Extracting HASC Code
company_sac_match = re.search(r'SAC\s*Code\s*(\d{5,6})', text)
company_sac = company_sac_match.group(1) if company_sac_match else "Not found"
# Extracting Description of Service
description_pattern_match = re.search(r'Description of Service\s*(.+)',text)
description_of_service = description_pattern_match.group(1) if description_pattern_match else "Not found"
# Extracting CGST Rate
company_cgst_match = re.search(r"CGST Rate\s*(\d+(\.\d+)?%?)",text)
company_cgst = company_cgst_match.group(1) if company_cgst_match else "Not found"
# Extracting CGST Amount
company_cgst_amt_match = re.search(r"CGST\s*Amount\s*\s*(\d+)", text)
company_cgst_amount = company_cgst_amt_match.group(1) if company_cgst_amt_match else "Not found"
# Extracting site gstin
site_gstin_match = re.search(r"GSTN\s+(\w+)",text)
site_gstin = site_gstin_match.group(1) if site_gstin_match else "Not found"
# Extracting SGST Rate
company_sgst_match = re.search(r"SGST Rate\s*(\d+(\.\d+)?%?)",text)
company_sgst = company_sgst_match.group(1) if company_sgst_match else "Not found"
# Extracting SGST Amount
company_sgst_amt_match = re.search(r"SGST\s*Amount\s*\s*(\d+)", text)
company_sgst_amount = company_sgst_amt_match.group(1) if company_sgst_amt_match else "Not found"
# Extracting IGST Rate
company_igst_match = re.search(r"IGST Rate\s*(\d+(\.\d+)?%?)",text)
company_igst = company_igst_match.group(1) if company_igst_match else "Not found"
# Extracting IGST Amount
company_igst_amt_match = re.search(r"IGST\s*Amount\s*\s*(\d+)", text)
company_igst_amount = company_igst_amt_match.group(1) if company_igst_amt_match else "Not found"
# Extracting company amount
company_amount_match = re.search(r'Total Invoice Value \(in figures\) (\d+(?:,\d+)?)',text)
company_amount = company_amount_match.group(1) if company_amount_match else "Not found"
#Extracting Net taxable values
net_tax_match = re.search(r'Net taxable value\s*(\d+(?:,\d+)?)',text)
net_tax = net_tax_match.group(1) if net_tax_match else "Not found"
data.append(
{
"COMPANY_NAME": bill_to_name if bill_to_name != "Not found" else "NIL",
"STATE": place_of_supply_state if place_of_supply_state != "Not found" else "NIL",
"COMPANY_GSTN":gstin if gstin != "Not found" else "NIL",
"INVOICE_SOURCE": None,
"SUPPLIER_NAME":company_name if company_name != "Not found" else "NIL",
"SUPPLIER_ADDRESS":company_add if company_add != "Not found" else "NIL",
"SUPPLIER_CITY": company_add if company_add != "Not found" else "NIL",
"SUPPLIER_CITY_PINCODE": 0,
"SITE_NAME":bill_to_name if bill_to_name != "Not found" else "NIL",
"SITE_STATE": place_of_supply_state if place_of_supply_state != "Not found" else "NIL", # Placeholder value
"SITE_STATE_CODE":"NIL",
"SITE_GSTIN": site_gstin if site_gstin != "Not found" else "NIL", # Placeholder value
"INVOICE_TYPE": "B2B", # Placeholder value
"SUPPLIER_INVOICE_NO": invoice_number if invoice_number != "Not found" else "NIL",
"SUPPLIER_INVOICE_DATE": invoice_date if invoice_date != "Not found" else "NIL",
"QTY": 0, # Placeholder value
"UOM": 0,
"UNIT_PRICE": 0, # Placeholder value
"SUPPLIER_TAXABLE_VALUE_INR": net_tax,
"CGST_RATE": company_cgst if company_cgst != "Not found" else 0,
"CGST_AMT": company_cgst_amount if company_cgst_amount != "Not found" else 0,
"SGST_RATE": company_sgst if company_sgst != "Not found" else 0,
"SGST_AMT": company_sgst_amount if company_sgst_amount != "Not found" else 0,
"IGST_RATE": company_igst if company_igst != "Not found" else 0,
"IGST_AMT": company_igst_amount if company_igst_amount != "Not found" else 0,
"IRN_CODE": 0, # Placeholder value
"EINV_STATUS": 0, # Placeholder value
"IRN_CREATION_DATE": 0, # Placeholder value
"INVOICE_AMOUNT_INR": company_amount if company_amount != "Not found" else 0,
"INVOICE_CURRENCY_CODE": "INR", # Placeholder value
"INVOICE_AMOUNT": company_amount if company_amount != "Not found" else 0,
"HSNSAC_CODE": company_sac if company_sac != "Not found" else 0,
"ITEM_DESCRIPTION": description_of_service if description_of_service != "Not found" else "NIL"
}
)
df = pd.DataFrame(data)
df.to_csv("output.csv",index=False)
return jsonify({"message": "Data extracted successfully", "output_csv": "output.csv"})
except Exception as e:
return str(e)
if __name__ == "__main__":
app.run(debug=True)