Skip to content

Commit b9e9114

Browse files
committed
PDFBOX-5920: try to get space width by encoding first; add test by Miroslav Holubec
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922425 13f79535-47bb-0310-9956-ffa450edef68
1 parent b0245c2 commit b9e9114

File tree

2 files changed

+56
-4
lines changed

2 files changed

+56
-4
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -552,8 +552,8 @@ public Matrix getFontMatrix()
552552
}
553553

554554
/**
555-
* Determines the width of the space character.
556-
*
555+
* Determines the width of the space character. This is very important for text extraction.
556+
*
557557
* @return the width of the space character
558558
*/
559559
public float getSpaceWidth()
@@ -572,7 +572,21 @@ public float getSpaceWidth()
572572
}
573573
else
574574
{
575-
fontWidthOfSpace = getWidth(32);
575+
try
576+
{
577+
// PDFBOX-5920: try with encoding, which gets the correct code
578+
fontWidthOfSpace = getStringWidth(" ");
579+
}
580+
catch (IllegalArgumentException | UnsupportedOperationException ex)
581+
{
582+
// Happens if space is not available in the font
583+
// or if encoding isn't implemented
584+
LOG.debug(ex.getMessage(), ex);
585+
}
586+
if (fontWidthOfSpace <= 0)
587+
{
588+
fontWidthOfSpace = getWidth(32);
589+
}
576590
}
577591

578592
// try to get it from the font itself
@@ -588,9 +602,10 @@ public float getSpaceWidth()
588602
}
589603
catch (Exception e)
590604
{
591-
LOG.error("Can't determine the width of the space character, assuming 250", e);
605+
LOG.error("Can't determine the width of the space character for font {}, assuming 250", getName(),e);
592606
fontWidthOfSpace = 250f;
593607
}
608+
LOG.debug("Space width for font {} is {}", getName(), fontWidthOfSpace);
594609
}
595610
return fontWidthOfSpace;
596611
}

pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,4 +462,41 @@ void testPDFBox5484() throws IOException
462462
assertTrue(new Area(path1).equals(new Area(path2))); // assertEquals does not test equals()
463463
}
464464
}
465+
466+
/**
467+
* Check space width.
468+
*
469+
* @throws IOException
470+
*/
471+
@Test
472+
void PDFBOX5920Type0() throws IOException
473+
{
474+
try (InputStream is =
475+
PDFontTest.class.getResourceAsStream("/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf");
476+
PDDocument document = new PDDocument())
477+
{
478+
PDFont font = PDType0Font.load(document, is, false);
479+
assertEquals(20064.0f, font.getStringWidth("The quick brown fox jumps over the lazy dog."));
480+
assertEquals(278.0f, font.getSpaceWidth());
481+
}
482+
}
483+
484+
/**
485+
* Check space width.
486+
*
487+
* @throws IOException
488+
*/
489+
@Test
490+
void PDFBOX5920TrueType() throws IOException
491+
{
492+
try (InputStream is =
493+
PDFontTest.class.getResourceAsStream("/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf");
494+
PDDocument document = new PDDocument())
495+
{
496+
PDFont font = PDTrueTypeFont.load(document, is, WinAnsiEncoding.INSTANCE);
497+
assertEquals(20064.0f,
498+
font.getStringWidth("The quick brown fox jumps over the lazy dog."));
499+
assertEquals(278.0f, font.getSpaceWidth());
500+
}
501+
}
465502
}

0 commit comments

Comments
 (0)