@@ -4196,8 +4196,7 @@ def extract_image(self, xref):
41964196 raise ValueError("document closed or encrypted")
41974197
41984198 pdf = _as_pdf_document(self)
4199- img_type = 0
4200- smask = 0
4199+
42014200 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
42024201 raise ValueError( MSG_BAD_XREF)
42034202
@@ -4210,65 +4209,15 @@ def extract_image(self, xref):
42104209 o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
42114210 if o.m_internal:
42124211 smask = mupdf.pdf_to_num(o)
4213-
4214- if mupdf.pdf_is_jpx_image(obj):
4215- img_type = mupdf.FZ_IMAGE_JPX
4216- res = mupdf.pdf_load_stream(obj)
4217- ext = "jpx"
4218- if JM_is_jbig2_image(obj):
4219- img_type = mupdf.FZ_IMAGE_JBIG2
4220- res = mupdf.pdf_load_stream(obj)
4221- ext = "jb2"
4222- res = mupdf.pdf_load_raw_stream(obj)
4223- if img_type == mupdf.FZ_IMAGE_UNKNOWN:
4224- res = mupdf.pdf_load_raw_stream(obj)
4225- _, c = mupdf.fz_buffer_storage(res)
4226- #log( '{=_ c}')
4227- img_type = mupdf.fz_recognize_image_format(c)
4228- ext = JM_image_extension(img_type)
4229- if img_type == mupdf.FZ_IMAGE_UNKNOWN:
4230- res = None
4231- img = mupdf.pdf_load_image(pdf, obj)
4232- ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
4233- if (ll_cbuf
4234- and ll_cbuf.params.type not in (
4235- mupdf.FZ_IMAGE_RAW,
4236- mupdf.FZ_IMAGE_FAX,
4237- mupdf.FZ_IMAGE_FLATE,
4238- mupdf.FZ_IMAGE_LZW,
4239- mupdf.FZ_IMAGE_RLD,
4240- )
4241- ):
4242- img_type = ll_cbuf.params.type
4243- ext = JM_image_extension(img_type)
4244- res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
4245- else:
4246- res = mupdf.fz_new_buffer_from_image_as_png(
4247- img,
4248- mupdf.FzColorParams(mupdf.fz_default_color_params),
4249- )
4250- ext = "png"
42514212 else:
4252- img = mupdf.fz_new_image_from_buffer(res)
4253-
4254- xres, yres = mupdf.fz_image_resolution(img)
4255- width = img.w()
4256- height = img.h()
4257- colorspace = img.n()
4258- bpc = img.bpc()
4259- cs_name = mupdf.fz_colorspace_name(img.colorspace())
4213+ smask = 0
42604214
4215+ # load the image
4216+ img = mupdf.pdf_load_image(pdf, obj)
42614217 rc = dict()
4262- rc[ dictkey_ext] = ext
4263- rc[ dictkey_smask] = smask
4264- rc[ dictkey_width] = width
4265- rc[ dictkey_height] = height
4266- rc[ dictkey_colorspace] = colorspace
4267- rc[ dictkey_bpc] = bpc
4268- rc[ dictkey_xres] = xres
4269- rc[ dictkey_yres] = yres
4270- rc[ dictkey_cs_name] = cs_name
4271- rc[ dictkey_image] = JM_BinFromBuffer(res)
4218+ make_image_dict(img, rc)
4219+ rc[dictkey_smask] = smask
4220+ rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
42724221 return rc
42734222
42744223 def ez_save(
@@ -16313,19 +16262,6 @@ def JM_irect_from_py(r):
1631316262 f[i] = FZ_MAX_INF_RECT
1631416263 return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
1631516264
16316-
16317- def JM_is_jbig2_image(dict_):
16318- # fixme: should we remove this function?
16319- return 0
16320- #filter_ = pdf_dict_get(ctx, dict_, PDF_NAME(Filter));
16321- #if (pdf_name_eq(ctx, filter_, PDF_NAME(JBIG2Decode)))
16322- # return 1;
16323- #n = pdf_array_len(ctx, filter_);
16324- #for (i = 0; i < n; i++)
16325- # if (pdf_name_eq(ctx, pdf_array_get(ctx, filter_, i), PDF_NAME(JBIG2Decode)))
16326- # return 1;
16327- #return 0;
16328-
1632916265def JM_listbox_value( annot):
1633016266 '''
1633116267 ListBox retrieve value
@@ -16500,38 +16436,52 @@ def __str__(self):
1650016436 line_dict[dictkey_spans] = span_list
1650116437 return line_rect
1650216438
16439+ def make_image_dict(img, img_dict):
16440+ """Populate a dictionary with information extracted from a given image.
1650316441
16504- def JM_make_image_block(block, block_dict):
16505- image = block.i_image()
16506- n = mupdf.fz_colorspace_n(image.colorspace())
16507- w = image.w ()
16508- h = image.h( )
16509- type_ = mupdf.FZ_IMAGE_UNKNOWN
16510- # fz_compressed_image_buffer() is not available because
16511- # `fz_compressed_buffer` is not copyable.
16512- ll_fz_compressed_buffer = mupdf.ll_fz_compressed_image_buffer(image.m_internal)
16513- if ll_fz_compressed_buffer:
16514- type_ = ll_fz_compressed_buffer.params.type
16515- if type_ < mupdf.FZ_IMAGE_BMP or type_ == mupdf.FZ_IMAGE_JBIG2:
16516- type_ = mupdf.FZ_IMAGE_UNKNOWN
16517- bytes_ = None
16518- if ll_fz_compressed_buffer and type_ != mupdf.FZ_IMAGE_UNKNOWN:
16519- buf = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( ll_fz_compressed_buffer.buffer))
16520- ext = JM_image_extension(type_)
16521- else:
16522- buf = mupdf.fz_new_buffer_from_image_as_png(image, mupdf.FzColorParams() )
16442+ Used by 'Document.extract_image' and by 'JM_make_image_block'.
16443+ Both of these functions will add some more specific information.
16444+ """
16445+ img_type = img.fz_compressed_image_type ()
16446+ ext = JM_image_extension(img_type )
16447+
16448+ # compressed image buffer if present, else None
16449+ ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
16450+
16451+ if (0
16452+ or not ll_cbuf
16453+ or img_type in ( mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
16454+ or img_type < mupdf.FZ_IMAGE_BMP
16455+ ):
16456+ # not an image with a compressed buffer: convert to PNG
16457+ res = mupdf.fz_new_buffer_from_image_as_png(
16458+ img,
16459+ mupdf.FzColorParams(mupdf.fz_default_color_params),
16460+ )
1652316461 ext = "png"
16524- bytes_ = JM_BinFromBuffer(buf)
16525- block_dict[ dictkey_width] = w
16526- block_dict[ dictkey_height] = h
16527- block_dict[ dictkey_ext] = ext
16528- block_dict[ dictkey_colorspace] = n
16529- block_dict[ dictkey_xres] = image.xres()
16530- block_dict[ dictkey_yres] = image.yres()
16531- block_dict[ dictkey_bpc] = image.bpc()
16532- block_dict[ dictkey_matrix] = JM_py_from_matrix(block.i_transform())
16533- block_dict[ dictkey_size] = len(bytes_)
16534- block_dict[ dictkey_image] = bytes_
16462+ elif ext == "jpeg" and img.n() == 4:
16463+ # JPEG with CMYK: invert colors
16464+ res = mupdf.fz_new_buffer_from_image_as_jpeg(
16465+ img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
16466+ else:
16467+ # copy the compressed buffer
16468+ res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
16469+
16470+ bytes_ = JM_BinFromBuffer(res)
16471+ img_dict[dictkey_width] = img.w()
16472+ img_dict[dictkey_height] = img.h()
16473+ img_dict[dictkey_ext] = ext
16474+ img_dict[dictkey_colorspace] = img.n()
16475+ img_dict[dictkey_xres] = img.xres()
16476+ img_dict[dictkey_yres] = img.yres()
16477+ img_dict[dictkey_bpc] = img.bpc()
16478+ img_dict[dictkey_size] = len(bytes_)
16479+ img_dict[dictkey_image] = bytes_
16480+
16481+ def JM_make_image_block(block, block_dict):
16482+ img = block.i_image()
16483+ make_image_dict(img, block_dict)
16484+ block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
1653516485
1653616486
1653716487def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
0 commit comments