@@ -4196,8 +4196,7 @@ def extract_image(self, xref):
41964196 raise ValueError("document closed or encrypted")
41974197
41984198 pdf = _as_pdf_document(self)
4199- img_type = 0
4200- smask = 0
4199+
42014200 if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
42024201 raise ValueError( MSG_BAD_XREF)
42034202
@@ -4210,65 +4209,15 @@ def extract_image(self, xref):
42104209 o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
42114210 if o.m_internal:
42124211 smask = mupdf.pdf_to_num(o)
4213-
4214- if mupdf.pdf_is_jpx_image(obj):
4215- img_type = mupdf.FZ_IMAGE_JPX
4216- res = mupdf.pdf_load_stream(obj)
4217- ext = "jpx"
4218- if JM_is_jbig2_image(obj):
4219- img_type = mupdf.FZ_IMAGE_JBIG2
4220- res = mupdf.pdf_load_stream(obj)
4221- ext = "jb2"
4222- res = mupdf.pdf_load_raw_stream(obj)
4223- if img_type == mupdf.FZ_IMAGE_UNKNOWN:
4224- res = mupdf.pdf_load_raw_stream(obj)
4225- _, c = mupdf.fz_buffer_storage(res)
4226- #log( '{=_ c}')
4227- img_type = mupdf.fz_recognize_image_format(c)
4228- ext = JM_image_extension(img_type)
4229- if img_type == mupdf.FZ_IMAGE_UNKNOWN:
4230- res = None
4231- img = mupdf.pdf_load_image(pdf, obj)
4232- ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
4233- if (ll_cbuf
4234- and ll_cbuf.params.type not in (
4235- mupdf.FZ_IMAGE_RAW,
4236- mupdf.FZ_IMAGE_FAX,
4237- mupdf.FZ_IMAGE_FLATE,
4238- mupdf.FZ_IMAGE_LZW,
4239- mupdf.FZ_IMAGE_RLD,
4240- )
4241- ):
4242- img_type = ll_cbuf.params.type
4243- ext = JM_image_extension(img_type)
4244- res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
4245- else:
4246- res = mupdf.fz_new_buffer_from_image_as_png(
4247- img,
4248- mupdf.FzColorParams(mupdf.fz_default_color_params),
4249- )
4250- ext = "png"
42514212 else:
4252- img = mupdf.fz_new_image_from_buffer(res)
4253-
4254- xres, yres = mupdf.fz_image_resolution(img)
4255- width = img.w()
4256- height = img.h()
4257- colorspace = img.n()
4258- bpc = img.bpc()
4259- cs_name = mupdf.fz_colorspace_name(img.colorspace())
4213+ smask = 0
42604214
4215+ # load the image
4216+ img = mupdf.pdf_load_image(pdf, obj)
42614217 rc = dict()
4262- rc[ dictkey_ext] = ext
4263- rc[ dictkey_smask] = smask
4264- rc[ dictkey_width] = width
4265- rc[ dictkey_height] = height
4266- rc[ dictkey_colorspace] = colorspace
4267- rc[ dictkey_bpc] = bpc
4268- rc[ dictkey_xres] = xres
4269- rc[ dictkey_yres] = yres
4270- rc[ dictkey_cs_name] = cs_name
4271- rc[ dictkey_image] = JM_BinFromBuffer(res)
4218+ _make_image_dict(img, rc)
4219+ rc[dictkey_smask] = smask
4220+ rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
42724221 return rc
42734222
42744223 def ez_save(
@@ -16323,19 +16272,6 @@ def JM_irect_from_py(r):
1632316272 f[i] = FZ_MAX_INF_RECT
1632416273 return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
1632516274
16326-
16327- def JM_is_jbig2_image(dict_):
16328- # fixme: should we remove this function?
16329- return 0
16330- #filter_ = pdf_dict_get(ctx, dict_, PDF_NAME(Filter));
16331- #if (pdf_name_eq(ctx, filter_, PDF_NAME(JBIG2Decode)))
16332- # return 1;
16333- #n = pdf_array_len(ctx, filter_);
16334- #for (i = 0; i < n; i++)
16335- # if (pdf_name_eq(ctx, pdf_array_get(ctx, filter_, i), PDF_NAME(JBIG2Decode)))
16336- # return 1;
16337- #return 0;
16338-
1633916275def JM_listbox_value( annot):
1634016276 '''
1634116277 ListBox retrieve value
@@ -16533,38 +16469,52 @@ def __str__(self):
1653316469 line_dict[dictkey_spans] = span_list
1653416470 return line_rect
1653516471
16472+ def _make_image_dict(img, img_dict):
16473+ """Populate a dictionary with information extracted from a given image.
1653616474
16537- def JM_make_image_block(block, block_dict):
16538- image = block.i_image()
16539- n = mupdf.fz_colorspace_n(image.colorspace())
16540- w = image.w ()
16541- h = image.h( )
16542- type_ = mupdf.FZ_IMAGE_UNKNOWN
16543- # fz_compressed_image_buffer() is not available because
16544- # `fz_compressed_buffer` is not copyable.
16545- ll_fz_compressed_buffer = mupdf.ll_fz_compressed_image_buffer(image.m_internal)
16546- if ll_fz_compressed_buffer:
16547- type_ = ll_fz_compressed_buffer.params.type
16548- if type_ < mupdf.FZ_IMAGE_BMP or type_ == mupdf.FZ_IMAGE_JBIG2:
16549- type_ = mupdf.FZ_IMAGE_UNKNOWN
16550- bytes_ = None
16551- if ll_fz_compressed_buffer and type_ != mupdf.FZ_IMAGE_UNKNOWN:
16552- buf = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( ll_fz_compressed_buffer.buffer))
16553- ext = JM_image_extension(type_)
16554- else:
16555- buf = mupdf.fz_new_buffer_from_image_as_png(image, mupdf.FzColorParams() )
16475+ Used by 'Document.extract_image' and by 'JM_make_image_block'.
16476+ Both of these functions will add some more specific information.
16477+ """
16478+ img_type = img.fz_compressed_image_type ()
16479+ ext = JM_image_extension(img_type )
16480+
16481+ # compressed image buffer if present, else None
16482+ ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
16483+
16484+ if (0
16485+ or not ll_cbuf
16486+ or img_type in ( mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
16487+ or img_type < mupdf.FZ_IMAGE_BMP
16488+ ):
16489+ # not an image with a compressed buffer: convert to PNG
16490+ res = mupdf.fz_new_buffer_from_image_as_png(
16491+ img,
16492+ mupdf.FzColorParams(mupdf.fz_default_color_params),
16493+ )
1655616494 ext = "png"
16557- bytes_ = JM_BinFromBuffer(buf)
16558- block_dict[ dictkey_width] = w
16559- block_dict[ dictkey_height] = h
16560- block_dict[ dictkey_ext] = ext
16561- block_dict[ dictkey_colorspace] = n
16562- block_dict[ dictkey_xres] = image.xres()
16563- block_dict[ dictkey_yres] = image.yres()
16564- block_dict[ dictkey_bpc] = image.bpc()
16565- block_dict[ dictkey_matrix] = JM_py_from_matrix(block.i_transform())
16566- block_dict[ dictkey_size] = len(bytes_)
16567- block_dict[ dictkey_image] = bytes_
16495+ elif ext == "jpeg" and img.n() == 4:
16496+ # JPEG with CMYK: invert colors
16497+ res = mupdf.fz_new_buffer_from_image_as_jpeg(
16498+ img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
16499+ else:
16500+ # copy the compressed buffer
16501+ res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
16502+
16503+ bytes_ = JM_BinFromBuffer(res)
16504+ img_dict[dictkey_width] = img.w()
16505+ img_dict[dictkey_height] = img.h()
16506+ img_dict[dictkey_ext] = ext
16507+ img_dict[dictkey_colorspace] = img.n()
16508+ img_dict[dictkey_xres] = img.xres()
16509+ img_dict[dictkey_yres] = img.yres()
16510+ img_dict[dictkey_bpc] = img.bpc()
16511+ img_dict[dictkey_size] = len(bytes_)
16512+ img_dict[dictkey_image] = bytes_
16513+
16514+ def JM_make_image_block(block, block_dict):
16515+ img = block.i_image()
16516+ _make_image_dict(img, block_dict)
16517+ block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
1656816518
1656916519
1657016520def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
0 commit comments