Skip to content

Commit 3bed5ee

Browse files
committed
realign on esobold
1 parent a1fd9d5 commit 3bed5ee

File tree

1 file changed

+9
-13
lines changed

1 file changed

+9
-13
lines changed

koboldcpp.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,11 +2588,9 @@ def extract_text(genparams):
25882588
docData = genparams.get("docData", "")
25892589
if docData.startswith("data:text"):
25902590
docData = docData.split(",", 1)[1]
2591-
2592-
# elif docData.startswith("data:application/pdf"):
2593-
# docData = docData.split(",", 1)[1]
2594-
# return extract_text_from_pdf(docData)
2595-
2591+
elif docData.startswith("data:application/pdf"):
2592+
docData = docData.split(",", 1)[1]
2593+
return extract_text_from_pdf(docData)
25962594
elif docData.startswith("data:audio"):
25972595
genparams["audio_data"] = docData
25982596
return whisper_generate(genparams)
@@ -2646,15 +2644,15 @@ def extract_text(genparams):
26462644

26472645
# # PDF extraction code by sevenof9
26482646
# def getTextFromPDFEncapsulated(decoded_bytes):
2649-
# # import pdfplumber
2647+
# import pdfplumber
26502648

26512649
# """
26522650
# Processes a page based on the page number, content and text settings being passed in.
26532651
# Returns the page number and the text content
26542652
# """
26552653
# def process_page(args):
26562654
# import json
2657-
# # from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
2655+
# from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
26582656

26592657
# # Ensure logging is only at error level (as this could be running in multiple threads)
26602658
# for logger_name in [
@@ -2790,9 +2788,9 @@ def extract_text(genparams):
27902788
# # with ThreadPoolExecutor(max_workers=num_cores) as exe:
27912789
# # return exe.map(process_page, pages)
27922790

2793-
# # decoded_bytes = io.BytesIO(decoded_bytes)
2794-
# # with pdfplumber.open(decoded_bytes) as pdf:
2795-
# # num_pages = len(pdf.pages)
2791+
# decoded_bytes = io.BytesIO(decoded_bytes)
2792+
# with pdfplumber.open(decoded_bytes) as pdf:
2793+
# num_pages = len(pdf.pages)
27962794

27972795
# TEXT_EXTRACTION_SETTINGS = {
27982796
# "x_tolerance": 2,
@@ -2825,7 +2823,7 @@ def extract_text(genparams):
28252823
# def getJsonFromPDFEncapsulatedPyMuPdf(decoded_bytes):
28262824
# from tqdm.auto import tqdm
28272825
# import fitz
2828-
# # import io
2826+
# import io
28292827
# from concurrent.futures import ThreadPoolExecutor
28302828
# import json
28312829
# import re
@@ -3205,10 +3203,8 @@ def tts_prepare_voice_json(jsonstr):
32053203
codestr = ""
32063204
for c in codes:
32073205
codestr += f"<|{c}|>"
3208-
32093206
# processed += f"{word}<|t_{duration:.2f}|><|code_start|>{codestr}<|code_end|>\n"
32103207
# return {"phrase":txt.strip()+".","voice":processed.strip()}
3211-
32123208
processed += f"{word}<t_{duration:.2f}><|code_start|>{codestr}<|code_end|>\n"
32133209
return {"phrase":txt,"voice":processed}
32143210

0 commit comments

Comments
 (0)