@@ -20,4 +20,47 @@ def pdf_to_images(pdf_path, output_folder):
20
20
image_paths .append (image_path )
21
21
noImg = i + 1
22
22
23
- return image_paths ,noImg
23
+ return image_paths ,noImg
24
+
25
+
26
+
27
+
28
+ substring_to_remove = "Scanned by CamScanner"
29
+
30
+ for i in range (1 ):
31
+ pdf_path = f'Local_Storage/notes_pdf/module_{ i + 1 } .pdf'
32
+ output_folder = f'images/Notes_images'
33
+
34
+ # Convert the PDF to images and save them in the output folder
35
+ image_paths , noImg = pdf_to_images (pdf_path , output_folder )
36
+ os .environ ['GOOGLE_APPLICATION_CREDENTIALS' ] = 'Files\Client_file_vision.json'
37
+ client = vision .ImageAnnotatorClient ()
38
+
39
+ # [START vision_python_migration_text_detection]
40
+ image_contents = " "
41
+
42
+ for j in range (noImg ):
43
+ image_path = f'images/Notes_images/Module_{ i + 1 } /page_{ j + 1 } .jpeg'
44
+ with open (image_path , 'rb' ) as image_file :
45
+ content = image_file .read ()
46
+ image = vision .Image (content = content )
47
+ response = client .text_detection (image = image )
48
+ texts = response .text_annotations [0 ]
49
+ text = str (texts .description )
50
+ image_contents += text .replace (substring_to_remove , "" )
51
+
52
+
53
+ output_file = f"Local_Storage/notes_txt/module{ i + 1 } .txt"
54
+
55
+ # Write the text content to the output file
56
+ with open (output_file , "w" ) as file :
57
+ file .write (image_contents )
58
+
59
+ if response .error .message :
60
+ raise Exception (
61
+ '{}\n For more info on error messages, check: '
62
+ 'https://cloud.google.com/apis/design/errors' .format (
63
+ response .error .message ))
64
+
65
+
66
+
0 commit comments