|
| 1 | +import os |
| 2 | +from fastapi import APIRouter |
| 3 | +from pdf2image import convert_from_path |
| 4 | +from google.cloud import vision |
| 5 | + |
| 6 | +# Create an instance of APIRouter |
| 7 | +router = APIRouter() |
| 8 | + |
| 9 | +def pdf_to_images(pdf_path, output_folder): |
| 10 | + |
| 11 | + # Convert PDF pages to images |
| 12 | + images = convert_from_path(pdf_path) |
| 13 | + |
| 14 | + # Create the output folder if it doesn't exist |
| 15 | + if not os.path.exists(output_folder): |
| 16 | + os.makedirs(output_folder) |
| 17 | + |
| 18 | + # Save each image in the specified output folder |
| 19 | + image_paths = [] |
| 20 | + for i, image in enumerate(images): |
| 21 | + image_path = os.path.join(output_folder, f'page_{i+1}.jpeg') |
| 22 | + image.save(image_path, 'JPEG') |
| 23 | + image_paths.append(image_path) |
| 24 | + noImg = i+1 |
| 25 | + return image_paths,noImg |
| 26 | + |
| 27 | +@router.get("/NotesToText") |
| 28 | +def NotesToText_handler(): |
| 29 | + substring_to_remove = "Scanned by CamScanner" |
| 30 | + for i in range(4): |
| 31 | + print(f"converting module-{i+1}....") |
| 32 | + pdf_path = f'Local_Storage/notes_pdf/module_{i+1}.pdf' |
| 33 | + output_folder = f'images/Notes_images/module_{i+1}' |
| 34 | + |
| 35 | + # Convert the PDF to images and save them in the output folder |
| 36 | + image_paths, noImg = pdf_to_images(pdf_path, output_folder) |
| 37 | + print(noImg) |
| 38 | + |
| 39 | + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'Files/client_file_vision.json' |
| 40 | + client = vision.ImageAnnotatorClient() |
| 41 | + |
| 42 | + # [START vision_python_migration_text_detection] |
| 43 | + image_contents = " " |
| 44 | + |
| 45 | + for j in range(noImg): |
| 46 | + image_path = f'images/Notes_images/Module_{i+1}/page_{j+1}.jpeg' |
| 47 | + with open(image_path, 'rb') as image_file: |
| 48 | + content = image_file.read() |
| 49 | + image = vision.Image(content=content) |
| 50 | + response = client.text_detection(image=image) |
| 51 | + texts = response.text_annotations[0] |
| 52 | + text = str(texts.description) |
| 53 | + image_contents += text.replace(substring_to_remove, "") |
| 54 | + |
| 55 | + |
| 56 | + output_file = f"Local_Storage/notes_txt/module_{i+1}.txt" |
| 57 | + # Write the text content to the output file |
| 58 | + with open(output_file, "w") as file: |
| 59 | + file.write(image_contents) |
| 60 | + print(f"module-{i+1} completed") |
| 61 | + |
| 62 | + if response.error.message: |
| 63 | + raise Exception( |
| 64 | + '{}\nFor more info on error messages, check: ' |
| 65 | + 'https://cloud.google.com/apis/design/errors'.format( |
| 66 | + response.error.message)) |
| 67 | + |
| 68 | + |
| 69 | + |
0 commit comments