@@ -2588,11 +2588,9 @@ def extract_text(genparams):
25882588 docData = genparams.get("docData", "")
25892589 if docData.startswith("data:text"):
25902590 docData = docData.split(",", 1)[1]
2591-
2592- # elif docData.startswith("data:application/pdf"):
2593- # docData = docData.split(",", 1)[1]
2594- # return extract_text_from_pdf(docData)
2595-
2591+ elif docData.startswith("data:application/pdf"):
2592+ docData = docData.split(",", 1)[1]
2593+ return extract_text_from_pdf(docData)
25962594 elif docData.startswith("data:audio"):
25972595 genparams["audio_data"] = docData
25982596 return whisper_generate(genparams)
@@ -2646,15 +2644,15 @@ def extract_text(genparams):
26462644
26472645# # PDF extraction code by sevenof9
26482646# def getTextFromPDFEncapsulated(decoded_bytes):
2649- # # import pdfplumber
2647+ # import pdfplumber
26502648
26512649 # """
26522650 # Processes a page based on the page number, content and text settings being passed in.
26532651 # Returns the page number and the text content
26542652 # """
26552653 # def process_page(args):
26562654 # import json
2657- # # from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
2655+ # from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
26582656
26592657 # # Ensure logging is only at error level (as this could be running in multiple threads)
26602658 # for logger_name in [
@@ -2790,9 +2788,9 @@ def extract_text(genparams):
27902788 # # with ThreadPoolExecutor(max_workers=num_cores) as exe:
27912789 # # return exe.map(process_page, pages)
27922790
2793- # # decoded_bytes = io.BytesIO(decoded_bytes)
2794- # # with pdfplumber.open(decoded_bytes) as pdf:
2795- # # num_pages = len(pdf.pages)
2791+ # decoded_bytes = io.BytesIO(decoded_bytes)
2792+ # with pdfplumber.open(decoded_bytes) as pdf:
2793+ # num_pages = len(pdf.pages)
27962794
27972795 # TEXT_EXTRACTION_SETTINGS = {
27982796 # "x_tolerance": 2,
@@ -2825,7 +2823,7 @@ def extract_text(genparams):
28252823# def getJsonFromPDFEncapsulatedPyMuPdf(decoded_bytes):
28262824 # from tqdm.auto import tqdm
28272825 # import fitz
2828- # # import io
2826+ # import io
28292827 # from concurrent.futures import ThreadPoolExecutor
28302828 # import json
28312829 # import re
@@ -3205,10 +3203,8 @@ def tts_prepare_voice_json(jsonstr):
32053203 codestr = ""
32063204 for c in codes:
32073205 codestr += f"<|{c}|>"
3208-
32093206 # processed += f"{word}<|t_{duration:.2f}|><|code_start|>{codestr}<|code_end|>\n"
32103207 # return {"phrase":txt.strip()+".","voice":processed.strip()}
3211-
32123208 processed += f"{word}<t_{duration:.2f}><|code_start|>{codestr}<|code_end|>\n"
32133209 return {"phrase":txt,"voice":processed}
32143210
0 commit comments