@@ -5385,16 +5385,21 @@ def select(self, pyliste):
53855385 raise ValueError("is no PDF")
53865386 if not hasattr(pyliste, "__getitem__"):
53875387 raise ValueError("sequence required")
5388- if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not in range(len(self)):
5388+
5389+ valid_range = range(len(self))
5390+ if (len(pyliste) == 0
5391+ or min(pyliste) not in valid_range
5392+ or max(pyliste) not in valid_range
5393+ ):
53895394 raise ValueError("bad page number(s)")
5390- # preparatory stuff:
5391- # (1) get underlying pdf document,
5392- # (2) transform Python list into integer array
5395+
5396+ # get underlying pdf document,
53935397 pdf = _as_pdf_document(self)
5394- # call retainpages (code copy of fz_clean_file.c)
5395- retainpages(pdf, pyliste)
5396- if pdf.m_internal.rev_page_map:
5397- mupdf.ll_pdf_drop_page_tree(pdf.m_internal)
5398+
5399+ # create page sub-pdf via extra.rearrange_pages2
5400+ extra.rearrange_pages2(pdf, tuple(pyliste))
5401+
5402+ # remove any existing pages with their kids
53985403 self._reset_page_refs()
53995404
54005405 def set_language(self, language=None):
@@ -20862,116 +20867,6 @@ def repair_mono_font(page: "Page", font: "Font") -> None:
2086220867 log("Cannot set width for '%s' in xref %i" % (font.name, xref))
2086320868
2086420869
20865- def retainpage(doc, parent, kids, page):
20866- '''
20867- Recreate page tree to only retain specified pages.
20868- '''
20869- pageref = mupdf.pdf_lookup_page_obj(doc, page)
20870- mupdf.pdf_flatten_inheritable_page_items(pageref)
20871- mupdf.pdf_dict_put(pageref, PDF_NAME('Parent'), parent)
20872- # Store page object in new kids array
20873- mupdf.pdf_array_push(kids, pageref)
20874-
20875-
20876- def retainpages(doc, liste):
20877- '''
20878- This is called by PyMuPDF:
20879- liste = page numbers to retain
20880- '''
20881- argc = len(liste)
20882- pagecount = mupdf.pdf_count_pages(doc)
20883-
20884- # Keep only pages/type and (reduced) dest entries to avoid
20885- # references to dropped pages
20886- oldroot = mupdf.pdf_dict_get(mupdf.pdf_trailer(doc), PDF_NAME('Root'))
20887- pages = mupdf.pdf_dict_get(oldroot, PDF_NAME('Pages'))
20888- olddests = mupdf.pdf_load_name_tree(doc, PDF_NAME('Dests'))
20889- outlines = mupdf.pdf_dict_get(oldroot, PDF_NAME('Outlines'))
20890- ocproperties = mupdf.pdf_dict_get(oldroot, PDF_NAME('OCProperties'))
20891- names_list = None
20892-
20893- root = mupdf.pdf_new_dict(doc, 3)
20894- mupdf.pdf_dict_put(root, PDF_NAME('Type'), mupdf.pdf_dict_get(oldroot, PDF_NAME('Type')))
20895- mupdf.pdf_dict_put(root, PDF_NAME('Pages'), mupdf.pdf_dict_get(oldroot, PDF_NAME('Pages')))
20896- if outlines.m_internal:
20897- mupdf.pdf_dict_put(root, PDF_NAME('Outlines'), outlines)
20898- if ocproperties.m_internal:
20899- mupdf.pdf_dict_put(root, PDF_NAME('OCProperties'), ocproperties)
20900-
20901- mupdf.pdf_update_object(doc, mupdf.pdf_to_num(oldroot), root)
20902-
20903- # Create a new kids array with only the pages we want to keep
20904- kids = mupdf.pdf_new_array(doc, 1)
20905-
20906- # Retain pages specified
20907- for page in range(argc):
20908- i = liste[page]
20909- if i < 0 or i >= pagecount:
20910- RAISEPY(MSG_BAD_PAGENO, PyExc_ValueError)
20911- retainpage(doc, pages, kids, i)
20912-
20913- # Update page count and kids array
20914- countobj = mupdf.pdf_new_int(mupdf.pdf_array_len(kids))
20915- mupdf.pdf_dict_put(pages, PDF_NAME('Count'), countobj)
20916- mupdf.pdf_dict_put(pages, PDF_NAME('Kids'), kids)
20917-
20918- pagecount = mupdf.pdf_count_pages(doc)
20919- page_object_nums = []
20920- for i in range(pagecount):
20921- pageref = mupdf.pdf_lookup_page_obj(doc, i)
20922- page_object_nums.append(mupdf.pdf_to_num(pageref))
20923-
20924- # If we had an old Dests tree (now reformed as an olddests dictionary),
20925- # keep any entries in there that point to valid pages.
20926- # This may mean we keep more than we need, but it is safe at least.
20927- if olddests:
20928- names = mupdf.pdf_new_dict(doc, 1)
20929- dests = mupdf.pdf_new_dict(doc, 1)
20930- len_ = mupdf.pdf_dict_len(olddests)
20931-
20932- names_list = mupdf.pdf_new_array(doc, 32)
20933-
20934- for i in range(len_):
20935- key = mupdf.pdf_dict_get_key(olddests, i)
20936- val = mupdf.pdf_dict_get_val(olddests, i)
20937- dest = mupdf.pdf_dict_get(val, PDF_NAME('D'))
20938-
20939- dest = mupdf.pdf_array_get(dest if dest.m_internal else val, 0)
20940- # fixme: need dest_is_valid_page.
20941- if dest_is_valid_page(dest, page_object_nums, pagecount):
20942- key_str = mupdf.pdf_new_string(mupdf.pdf_to_name(key), len(mupdf.pdf_to_name(key)))
20943- mupdf.pdf_array_push(names_list, key_str)
20944- mupdf.pdf_array_push(names_list, val)
20945-
20946- mupdf.pdf_dict_put(dests, PDF_NAME('Names'), names_list)
20947- mupdf.pdf_dict_put(names, PDF_NAME('Dests'), dests)
20948- mupdf.pdf_dict_put(root, PDF_NAME('Names'), names)
20949-
20950- # Edit each pages /Annot list to remove any links pointing to nowhere.
20951- for i in range(pagecount):
20952- pageref = mupdf.pdf_lookup_page_obj(doc, i)
20953- annots = mupdf.pdf_dict_get(pageref, PDF_NAME('Annots'))
20954- len_ = mupdf.pdf_array_len(annots)
20955- j = 0
20956- while 1:
20957- if j >= len_:
20958- break
20959- o = mupdf.pdf_array_get(annots, j)
20960-
20961- if not mupdf.pdf_name_eq(mupdf.pdf_dict_get(o, PDF_NAME('Subtype')), PDF_NAME('Link')):
20962- continue
20963-
20964- if not dest_is_valid(o, pagecount, page_object_nums, names_list):
20965- # Remove this annotation
20966- mupdf.pdf_array_delete(annots, j)
20967- len_ -= 1
20968- j -= 1
20969- j += 1
20970-
20971- if strip_outlines( doc, outlines, pagecount, page_object_nums, names_list) == 0:
20972- mupdf.pdf_dict_del(root, PDF_NAME('Outlines'))
20973-
20974-
2097520870def sRGB_to_pdf(srgb: int) -> tuple:
2097620871 """Convert sRGB color code to a PDF color triple.
2097720872
0 commit comments