Skip to content

Commit d89dc58

Browse files
committed
Merge branch 'sameer'
2 parents 225b98c + 707acbf commit d89dc58

File tree

446 files changed

+14329
-1454
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

446 files changed

+14329
-1454
lines changed

.DS_Store

6 KB
Binary file not shown.

Backend/NotesToText.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
from fastapi import APIRouter
3+
from pdf2image import convert_from_path
4+
from google.cloud import vision
5+
6+
# Create an instance of APIRouter
7+
router = APIRouter()
8+
9+
def pdf_to_images(pdf_path, output_folder):
10+
11+
# Convert PDF pages to images
12+
images = convert_from_path(pdf_path)
13+
14+
# Create the output folder if it doesn't exist
15+
if not os.path.exists(output_folder):
16+
os.makedirs(output_folder)
17+
18+
# Save each image in the specified output folder
19+
image_paths = []
20+
for i, image in enumerate(images):
21+
image_path = os.path.join(output_folder, f'page_{i+1}.jpeg')
22+
image.save(image_path, 'JPEG')
23+
image_paths.append(image_path)
24+
noImg = i+1
25+
return image_paths,noImg
26+
27+
@router.get("/NotesToText")
28+
def NotesToText_handler():
29+
substring_to_remove = "Scanned by CamScanner"
30+
for i in range(4):
31+
print(f"converting module-{i+1}....")
32+
pdf_path = f'Local_Storage/notes_pdf/module_{i+1}.pdf'
33+
output_folder = f'images/Notes_images/module_{i+1}'
34+
35+
# Convert the PDF to images and save them in the output folder
36+
image_paths, noImg = pdf_to_images(pdf_path, output_folder)
37+
print(noImg)
38+
39+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'Files/client_file_vision.json'
40+
client = vision.ImageAnnotatorClient()
41+
42+
# [START vision_python_migration_text_detection]
43+
image_contents = " "
44+
45+
for j in range(noImg):
46+
image_path = f'images/Notes_images/Module_{i+1}/page_{j+1}.jpeg'
47+
with open(image_path, 'rb') as image_file:
48+
content = image_file.read()
49+
image = vision.Image(content=content)
50+
response = client.text_detection(image=image)
51+
texts = response.text_annotations[0]
52+
text = str(texts.description)
53+
image_contents += text.replace(substring_to_remove, "")
54+
55+
56+
output_file = f"Local_Storage/notes_txt/module_{i+1}.txt"
57+
# Write the text content to the output file
58+
with open(output_file, "w") as file:
59+
file.write(image_contents)
60+
print(f"module-{i+1} completed")
61+
62+
if response.error.message:
63+
raise Exception(
64+
'{}\nFor more info on error messages, check: '
65+
'https://cloud.google.com/apis/design/errors'.format(
66+
response.error.message))
67+
68+
69+
1.98 KB
Binary file not shown.
10 Bytes
Binary file not shown.
12 Bytes
Binary file not shown.

Files/client_file_vision.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"type": "service_account",
3+
"project_id": "notes2text",
4+
"private_key_id": "2fed7b9ef897663b69d3d0acf6194e99461a3506",
5+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQChDBQSYHpS0o6n\nhnnp6exuAQgu4tL+IDlGCXBySbfXrwcbuItmzFNwCDe49yBDa1TjeS0ewBo3bb5C\nJR3ITd4jzRqKl1FbGEMrIteeH61nDM6jxzGGyRmTG1hfi56kidGcwsnaODEPDxCs\nfrXE/m7Ep8MvYCIoiK+daa52lihjZN4fDx5TARQzULC3pLxprTWgqCh/MkylkNFl\n0Zac/P2+4Z+o7fMH5TDlMupuUZEmim6tqee/Rey2qKKAWrvp4crdKLtGIUVaw3Ii\nefz/9fvYLO0YTGMIobn2RxnpHVOE9h8+NUebdY+tVrs/K3uPaltF9QV30MEZKce1\n1xu1C/ffAgMBAAECggEAPskEsj1msw9fijRFJ0qJYEGKvSy08VoCuilH62aOA8o6\nzTXfMw6JoO2zO6ssqtwXVi+RxaFDF0Tznbn2qHajGfYGWO2xTABjf1ewE/dplPlL\nPlKUHmYcUFui0m0ORjtsut697xNlHOfUqksj9k7zRDvbPocOChtvy8C9wYG88TJt\nMzd5Id6SMZmbkn3mQjMhE8znbEfanBuKdBpDZZQy+LYBwI01/qg+mfLM6qcb8BHg\nrKauDfn5aWnniHTW3b1PCi+A+qCR2QEoSCWlNTZS1ApO76zcoRhf4mISTE3f6YPH\nLLnO9HeqVxl9vzkD6if3tIqfdQwQeam3T5Rl5ULP0QKBgQDY9VADgaw1oqWJ9y83\nuK5uXVf/u9gpAg7tPqwG9oIIXLMaqS0rhiZDkHEd/A7DX5Fm8QNFMVeTzQJRx93M\nHXbHkeTXO7nBlalgo8csweS6NamGWKrFzFuI9Pol898wdGo/j4DWRXruqNaG7QGV\n1bZO+X6/XnSrAnGpfCE27olqCwKBgQC+BxeS9XwvgGD0aZg9Fy5RhhBNYf06lXp9\nvIgEuHWfYu7zY5kyURagMtEUbrGYJAQvP7F9NwVg39ZcFEiZ7MytArKyD8ylwrUE\nowKYgUiLQUWhhgVTboW0p5X4AIsGZQ32AEEZwxX+jXhSDUgQnhINk3o4t1pjQx/A\nMFXKjDBh/QKBgQCF02OEljMMboQfwwosG830o71JMKxYNz3P142zSdDRqaSMuHf1\nVe06VdmAfl744nVFAztaLeJwd8+LdHZ6ll4TIgn/PYcKQu/UfZG5KOyWc81a4kyM\nb86yfzHhwTkVoWl8RV9aTEdU+W5CkeUdcFLhCgLFtlTOu1YzDi9Ku/6lGwKBgQCX\nXLrQWi2DALU5ueqDrrU8RpcxrD2yrmIVSVjmua8nN/yxbcIXLDPjgta7ebiIQRf8\nMrXD0VG9ak4qphV2VgJqRN3lq2QNHNxnRxjINSJ9mY5HLKONY1mpHlv8YSJcA55t\nNqKkTJp2WmVepimi4uXqXnBaQOIEcvEVeuq7HZNkWQKBgCOBEEx058m9Duq8EgQ3\nH1taXkaUmSxlndI2VaLp00tqyfk0Yr9NPytwUlf0M2DkJbWrjjIyResPtZKWaM34\npTS08j90A+o65I+I2caHtsnPnVPxJ6/d0YrEbKUgMvC+et3mhvGpflugvWy9YX7v\nAbdXeljWTGv1qc8rIZ4KS5vr\n-----END PRIVATE KEY-----\n",
6+
"client_email": "[email protected]",
7+
"client_id": "105405504203772246599",
8+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
9+
"token_uri": "https://oauth2.googleapis.com/token",
10+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/image2text%40notes2text.iam.gserviceaccount.com",
12+
"universe_domain": "googleapis.com"
13+
}

Local_Storage/notes_txt/module3.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)