Skip to content

Commit 024872a

Browse files
docs/ src/ tests/: Page.insert_htmlbox(): also down-scale to fit long words.
We use mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW with fz_place_story_flags(). Also updated test_htmlbox1() to match new behaviour where Page.insert_htmlbox() returns with a small positive <height> on success.
1 parent 0d63649 commit 024872a

File tree

4 files changed

+155
-34
lines changed

4 files changed

+155
-34
lines changed

docs/page.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -847,14 +847,24 @@ In a nutshell, this is what you can do with PyMuPDF:
847847

848848
:returns: A tuple of floats `(spare_height, scale)`.
849849

850-
- `spare_height`: -1 if content did not fit, else >= 0. It is the height of the unused (still available) rectangle stripe. Positive only if scale = 1 (no down-scaling happened).
851-
- `scale`: down-scaling factor, 0 < scale <= 1.
850+
- spare_height: The (positive) height of the remaining space in `rect` below the
851+
text, or -1 if we failed to fit.
852+
- scale: The scaling required; `0 < scale <= 1`. Will be `scale_low`
853+
if we failed to fit.
852854

853-
Please refer to examples in this section of the recipes: :ref:`RecipesText_I_c`.
855+
Please refer to examples in this section of the recipes: :ref:`RecipesText_I_c`.
854856

855857
|history_begin|
856858

857-
* New in v1.23.8; rebased-only.
859+
* New in v1.26.5:
860+
861+
* do additional scaling to fit long words.
862+
*
863+
If we succeeded and scaled down, the returned `spare_height` is now
864+
generally positive instead of being fixed to zero, because the final
865+
rect's height is usually not an exact multiple of the font line
866+
height.
867+
* New in v1.23.8: rebased-only.
858868
* New in v1.23.9: `opacity` parameter.
859869

860870
|history_end|

src/__init__.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12266,7 +12266,9 @@ def insert_htmlbox(
1226612266
oc=0,
1226712267
opacity=1,
1226812268
overlay=True,
12269-
) -> float:
12269+
_scale_word_width=True,
12270+
_verbose=False,
12271+
) -> tuple:
1227012272
"""Insert text with optional HTML tags and stylings into a rectangle.
1227112273

1227212274
Args:
@@ -12282,14 +12284,17 @@ def insert_htmlbox(
1228212284
oc: (int) the xref of an OCG / OCMD (Optional Content).
1228312285
opacity: (float) set opacity of inserted content.
1228412286
overlay: (bool) put text on top of page content.
12287+
_scale_word_width: internal, for testing only.
12288+
_verbose: internal, for testing only.
1228512289
Returns:
1228612290
A tuple of floats (spare_height, scale).
12287-
spare_height: -1 if content did not fit, else >= 0. It is the height of the
12288-
unused (still available) rectangle stripe. Positive only if
12289-
scale_min = 1 (no down scaling).
12290-
scale: downscaling factor, 0 < scale <= 1. Set to 0 if spare_height = -1 (no fit).
12291+
spare_height:
12292+
The height of the remaining space in <rect> below the
12293+
text, or -1 if we failed to fit.
12294+
scale:
12295+
The scaling required; `0 < scale <= 1`.
12296+
Will be less than `scale_low` if we failed to fit.
1229112297
"""
12292-
1229312298
# normalize rotation angle
1229412299
if not rotate % 90 == 0:
1229512300
raise ValueError("bad rotation angle")
@@ -12320,25 +12325,40 @@ def insert_htmlbox(
1232012325
story = text
1232112326
else:
1232212327
raise ValueError("'text' must be a string or a Story")
12328+
1232312329
# ----------------------------------------------------------------
12324-
# Find a scaling factor that lets our story fit in
12330+
# Find a scaling factor that lets our story fit in. Instead of scaling
12331+
# the text smaller, we instead look at how much bigger the rect needs
12332+
# to be to fit the text, then reverse the scaling to get how much we
12333+
# need to scale down the text.
1232512334
# ----------------------------------------------------------------
12326-
scale_max = None if scale_low == 0 else 1 / scale_low
12327-
12328-
fit = story.fit_scale(temp_rect, scale_min=1, scale_max=scale_max)
12335+
rect_scale_max = None if scale_low == 0 else 1 / scale_low
12336+
12337+
fit = story.fit_scale(
12338+
temp_rect,
12339+
scale_min=1,
12340+
scale_max=rect_scale_max,
12341+
flags=mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW if _scale_word_width else 0,
12342+
verbose=_verbose,
12343+
)
12344+
1232912345
if not fit.big_enough: # there was no fit
12330-
return (-1, scale_low)
12331-
12332-
filled = fit.filled
12333-
scale = 1 / fit.parameter # shrink factor
12334-
12335-
spare_height = fit.rect.y1 - filled[3] # unused room at rectangle bottom
12336-
# Note: due to MuPDF's logic this may be negative even for successful fits.
12337-
if scale != 1 or spare_height < 0: # if scaling occurred, set spare_height to 0
12338-
spare_height = 0
12346+
scale = 1 / fit.parameter
12347+
return (-1, scale)
12348+
12349+
# fit.filled is a tuple; we convert it in place to a Rect for
12350+
# convenience. (fit.rect is already a Rect.)
12351+
fit.filled = Rect(fit.filled)
12352+
assert (fit.rect.x0, fit.rect.y0) == (0, 0)
12353+
assert (fit.filled.x0, fit.filled.y0) == (0, 0)
12354+
12355+
scale = 1 / fit.parameter
12356+
assert scale >= scale_low, f'{scale_low=} {scale=}'
12357+
12358+
spare_height = max((fit.rect.y1 - fit.filled.y1) * scale, 0)
1233912359

1234012360
def rect_function(*args):
12341-
return fit.rect, fit.rect, Identity
12361+
return fit.rect, fit.rect, None
1234212362

1234312363
# draw story on temp PDF page
1234412364
doc = story.write_with_links(rect_function)
@@ -15925,10 +15945,13 @@ class Position2:
1592515945
function( position2)
1592615946
mupdf.fz_story_positions( self.this, function2)
1592715947

15928-
def place( self, where):
15948+
def place( self, where, flags=0):
15949+
'''
15950+
Wrapper for fz_place_story_flags().
15951+
'''
1592915952
where = JM_rect_from_py( where)
1593015953
filled = mupdf.FzRect()
15931-
more = mupdf.fz_place_story( self.this, where, filled)
15954+
more = mupdf.fz_place_story_flags( self.this, where, filled, flags)
1593215955
return more, JM_py_from_rect( filled)
1593315956

1593415957
def reset( self):
@@ -16045,15 +16068,17 @@ class FitResult:
1604516068
`big_enough`:
1604616069
`True` if the fit succeeded.
1604716070
`filled`:
16048-
From the last call to `Story.place()`.
16071+
Tuple (x0, y0, x1, y1) from the last call to `Story.place()`. This
16072+
will be wider than .rect if any single word (which we never split)
16073+
was too wide for .rect.
1604916074
`more`:
1605016075
`False` if the fit succeeded.
1605116076
`numcalls`:
1605216077
Number of calls made to `self.place()`.
1605316078
`parameter`:
1605416079
The successful parameter value, or the largest failing value.
1605516080
`rect`:
16056-
The rect created from `parameter`.
16081+
The pumupdf.Rect created from `parameter`.
1605716082
'''
1605816083
def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
1605916084
self.big_enough = big_enough
@@ -16073,7 +16098,7 @@ def __repr__(self):
1607316098
f' rect={self.rect}'
1607416099
)
1607516100

16076-
def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
16101+
def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False, flags=0):
1607716102
'''
1607816103
Finds optimal rect that contains the story `self`.
1607916104

@@ -16100,6 +16125,9 @@ def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
1610016125
Maximum error in returned `parameter`.
1610116126
:arg verbose:
1610216127
If true we output diagnostics.
16128+
:arg flags:
16129+
Passed to mupdf.fz_place_story_flags(). e.g.
16130+
zero or `mupdf.FZ_PLACE_STORY_FLAG_NO_OVERFLOW`.
1610316131
'''
1610416132
def log(text):
1610516133
assert verbose
@@ -16155,7 +16183,7 @@ def update(parameter):
1615516183
if verbose:
1615616184
log(f'update(): not calling self.place() because rect is empty.')
1615716185
else:
16158-
more, filled = self.place(rect)
16186+
more, filled = self.place(rect, flags)
1615916187
state.numcalls += 1
1616016188
big_enough = not more
1616116189
result = Story.FitResult(
@@ -16224,12 +16252,12 @@ def opposite(p, direction):
1622416252
parameter = (state.pmin + state.pmax) / 2
1622516253
update(parameter)
1622616254

16227-
def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
16255+
def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False, flags=0):
1622816256
'''
1622916257
Finds smallest value `scale` in range `scale_min..scale_max` where
1623016258
`scale * rect` is large enough to contain the story `self`.
1623116259

16232-
Returns a `Story.FitResult` instance.
16260+
Returns a `Story.FitResult` instance with `.parameter` set to `scale`.
1623316261

1623416262
:arg width:
1623516263
width of rect.
@@ -16244,13 +16272,15 @@ def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=Fals
1624416272
Maximum error in returned scale.
1624516273
:arg verbose:
1624616274
If true we output diagnostics.
16275+
:arg flags:
16276+
Passed to Story.place().
1624716277
'''
1624816278
x0, y0, x1, y1 = rect
1624916279
width = x1 - x0
1625016280
height = y1 - y0
1625116281
def fn(scale):
1625216282
return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
16253-
return self.fit(fn, scale_min, scale_max, delta, verbose)
16283+
return self.fit(fn, scale_min, scale_max, delta, verbose, flags)
1625416284

1625516285
def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
1625616286
'''

tests/resources/test_4613.png

64.8 KB
Loading

tests/test_textbox.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
"""
88
import pymupdf
99

10+
import gentle_compare
11+
12+
import os
13+
import textwrap
14+
1015
# codespell:ignore-begin
1116
text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
1217
@@ -182,7 +187,9 @@ def test_htmlbox1():
182187
assert spare_height < 0
183188
assert scale == 1
184189
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
185-
assert spare_height == 0
190+
page.draw_rect(rect, (1, 0, 0))
191+
doc.save(os.path.normpath(f'{__file__}/../../tests/test_htmlbox1.pdf'))
192+
assert abs(spare_height - 3.8507) < 0.001
186193
assert 0 < scale < 1
187194
page = doc.reload_page(page)
188195
link = page.get_links()[0] # extracts the links on the page
@@ -286,3 +293,77 @@ def test_4400():
286293
text = '111111111'
287294
print(f'Calling writer.fill_textbox().', flush=1)
288295
writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)
296+
297+
298+
def test_4613():
299+
print()
300+
text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n'
301+
story = pymupdf.Story(text)
302+
rect = pymupdf.Rect(10, 10, 100, 100)
303+
304+
# Test default operation where we get additional scaling down because of
305+
# the long words in our text.
306+
print(f'test_4613(): ### Testing default operation.')
307+
with pymupdf.open() as doc:
308+
page = doc.new_page()
309+
spare_height, scale = page.insert_htmlbox(rect, story)
310+
print(f'test_4613(): {spare_height=} {scale=}')
311+
# The additional down-scaling from the long word widths results in
312+
# spare vertical space.
313+
page.draw_rect(rect, (1, 0, 0))
314+
path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf')
315+
doc.save(path)
316+
317+
path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png')
318+
path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png')
319+
pixmap = page.get_pixmap(dpi=300)
320+
pixmap.save(path_pixmap)
321+
322+
pixmap_diff = gentle_compare.pixmaps_diff(path_pixmap_expected, pixmap)
323+
pixmap_diff.save(os.path.normpath(f'{__file__}/../../tests/test_4613-diff.png'))
324+
325+
rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected)
326+
print(f'{rms=}')
327+
assert rms == 0, f'{rms=}'
328+
329+
assert abs(spare_height - 45.7536) < 0.1
330+
assert abs(scale - 0.4009) < 0.01
331+
332+
new_text = page.get_text('text', clip=rect)
333+
print(f'test_4613(): new_text:')
334+
print(textwrap.indent(new_text, ' '))
335+
assert new_text == text
336+
337+
# Check with _scale_word_width=False - ignore too-wide words.
338+
print(f'test_4613(): ### Testing with _scale_word_width=False.')
339+
with pymupdf.open() as doc:
340+
page = doc.new_page()
341+
spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False)
342+
print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}')
343+
# With _scale_word_width=False we allow long words to extend beyond the
344+
# rect, so we should have spare_height == 0 and only a small amount of
345+
# down-scaling.
346+
assert spare_height == 0
347+
assert abs(scale - 0.914) < 0.01
348+
new_text = page.get_text('text', clip=rect)
349+
print(f'test_4613(): new_text:')
350+
print(textwrap.indent(new_text, ' '))
351+
assert new_text == textwrap.dedent('''
352+
abcdefghijklmno
353+
ABCDEFGHIJKLM
354+
abcdefghijklmno
355+
ABCDEFGHIJKLM
356+
abcdefghijklmno
357+
ABCDEFGHIJKLM
358+
''')[1:]
359+
360+
361+
# Check that we get no fit if scale_low is not low enough.
362+
print(f'test_4613(): ### Testing with scale_low too high to allow a fit.')
363+
with pymupdf.open() as doc:
364+
page = doc.new_page()
365+
scale_low=0.6
366+
spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low)
367+
print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}')
368+
assert spare_height == -1
369+
assert scale == scale_low

0 commit comments

Comments
 (0)