Replies: 3 comments 19 replies
-
This is more a weakness of Adobe's viewer than one of the font: other viewers display it nicer, e.g. Foxit The font does have a weird But if you use this as flags: |
Beta Was this translation helpful? Give feedback.
-
I just want to use acrobat to convert pdf to pptx, but original pdf failed to export good pptx. |
Beta Was this translation helpful? Give feedback.
-
1output.pdf import fitz # PyMuPDF
import sys
import os
INPUT_PDF = r"L:\1.pdf"
OUTPUT_PDF = r"F:\1output.pdf"
FONT_PATH = r"C:\Windows\Fonts\simhei.ttf" # Make sure this font file exists in your working directory, or use an absolute path
def redact_all_text(page, dict_out):
"""Redact all text spans on a page using rectangles from the dict output."""
for block in dict_out["blocks"]:
if block["type"] != 0:
continue
for line in block["lines"]:
for span in line["spans"]:
r = fitz.Rect(span["bbox"])
# Add a redaction annotation for the span
page.add_redact_annot(r, fill=(1, 1, 1)) # white fill
def write_spans(npage, blocks, fontname, fontsize_add=0):
"""Write each span back with a better font."""
helv = fitz.Font("helv")
cjk = fitz.Font("cjk")
# arial = fitz.Font(fontfile="C:/Windows/Fonts/arial.ttf")
arial = fitz.Font(fontfile="F:/fonts/Arial Unicode MS.ttf")
# blocks = page.get_text("rawdict")["blocks"]
helv = fitz.Font("helv")
cjk = fitz.Font("cjk")
# arial = fitz.Font(fontfile="C:/Windows/Fonts/arial.ttf")
arial = fitz.Font(fontfile="F:/fonts/Arial Unicode MS.ttf")
for b in blocks:
if "lines" not in b:
continue
for l in b["lines"]:
print(f'l={l}')
cos, sin = l["dir"]
matrix = fitz.Matrix(cos, -sin, sin, cos, 0, 0)
for s in l["spans"]:
textBox = s['bbox']
fsize = s["size"]
fname = s["font"]
for c in s["chars"]:
# if fname.lower().startswith("arial"):
# font = arial
# else:
# font = helv
font = cjk
ch = c["c"]
origin = fitz.Point(c["origin"])
tw = fitz.TextWriter(npage.rect)
tw.append(origin, ch, font=font, fontsize=fsize)
tw.write_text(npage, morph=(origin, matrix))
shape = npage.new_shape()
shape.draw_rect(textBox)
shape.finish(
fill=None, # fill color
color=(0, 0, 1), # line color
)
shape.commit()
def main(input_pdf, output_pdf, font_path):
doc = fitz.open(input_pdf)
# Register the font under a custom name
# fontname = "MyBetterFont"
# fitz.Font(fontname=fontname, fontfile=font_path, set_simple=True)
# fitz.Font(fontname="HT", fontfile=r"C:\Windows\Fonts\simhei.ttf", fontbuffer=None, set_simple=False)
new_doc = fitz.open() # output PDF
for i, page in enumerate(doc):
ff = page.insert_font(fontname="HT", fontfile=r"C:\Windows\Fonts\simhei.ttf", fontbuffer=None, set_simple=False)
dict_out = page.get_text("dict")
blocks = page.get_text("rawdict")["blocks"]
# Redact all text
redact_all_text(page, dict_out)
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE) # keep images
# Copy the redacted page to new PDF
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
new_page.show_pdf_page(page.rect, doc, i)
# Now write the text spans
write_spans(new_page, blocks, fontname="HT", fontsize_add=0)
new_doc.save(output_pdf)
print(f"Done. Output saved to {output_pdf}")
if __name__ == "__main__":
# Usage: python rewrite_pdf_font.py input.pdf output.pdf
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input_pdf", nargs="?", default=INPUT_PDF)
parser.add_argument("output_pdf", nargs="?", default=OUTPUT_PDF)
parser.add_argument("--font", default=FONT_PATH, help="Path to TTF font file")
args = parser.parse_args()
if not os.path.exists(args.input_pdf):
print("Input PDF not found!")
sys.exit(1)
if not os.path.exists(args.font):
print("Font file not found!")
sys.exit(1)
main(args.input_pdf, args.output_pdf, args.font) |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
1.pdf
Is it possible to rewrite such pdf to a correct font, so that the text shown matches the text selected?

Only a thin line is selected now.
Beta Was this translation helpful? Give feedback.
All reactions