Skip to content

Commit a9229ea

Browse files
committed
notestotext
1 parent 2c0ab9f commit a9229ea

File tree

18 files changed

+633
-0
lines changed

18 files changed

+633
-0
lines changed

Backend/NotesToText.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@
1717

1818
router = APIRouter()
1919

20+
def download_files_from_s3(bucket_name, prefix, local_directory):
21+
22+
paginator = s3.get_paginator('list_objects_v2')
23+
operation_parameters = {'Bucket': bucket_name, 'Prefix': prefix}
24+
25+
page_iterator = paginator.paginate(**operation_parameters)
26+
27+
for page in page_iterator:
28+
if 'Contents' in page:
29+
for item in page['Contents']:
30+
key = item['Key']
31+
local_file_path = os.path.join(local_directory, os.path.basename(key))
32+
s3.download_file(bucket_name, key, local_file_path)
33+
print(f"Downloaded {key} to {local_file_path}")
34+
35+
36+
2037
def pdf_to_images(pdf_path, output_folder):
2138

2239
# Convert PDF pages to images
@@ -39,6 +56,15 @@ def pdf_to_images(pdf_path, output_folder):
3956
def NotesToText_handler():
4057
substring_to_remove = "Scanned by CamScanner"
4158

59+
prefix = 'notes_pdf/'
60+
local_directory = 'Local_Storage/notes_pdf'
61+
62+
# Create the local directory if it doesn't exist
63+
os.makedirs(local_directory, exist_ok=True)
64+
65+
# Download files from S3
66+
download_files_from_s3(s3_bucket_name, prefix, local_directory)
67+
4268
folder_path = "Local_Storage/notes_pdf"
4369

4470
# Get all files in the folder
@@ -78,6 +104,15 @@ def NotesToText_handler():
78104
with open(output_file, "w",encoding="utf-8") as file:
79105
file.write(image_contents)
80106
print(f"{file_name} completed")
107+
108+
s3_key = f'notes_txt/{file_name}.txt'
109+
110+
# Write the text content to the output file
111+
s3.put_object(
112+
Body=image_contents,
113+
Bucket=s3_bucket_name,
114+
Key=s3_key
115+
)
81116

82117
if response.error.message:
83118
raise Exception(
733 Bytes
Binary file not shown.
-741 KB
Binary file not shown.
17.1 KB
Binary file not shown.

Local_Storage/notes_pdf/SEM3.pdf

198 KB
Binary file not shown.
-114 KB
Binary file not shown.
-299 KB
Binary file not shown.
-4.72 MB
Binary file not shown.
-1.52 MB
Binary file not shown.
-3.92 MB
Binary file not shown.

0 commit comments

Comments
 (0)