Skip to content

Commit caab2c5

Browse files
authored
[read-fonts] extend support for cmap 6 and 10 (googlefonts#1655)
Adds support for mapping/iteration for cmap formats 6 and 10.
1 parent 707ebc8 commit caab2c5

File tree

7 files changed

+251
-1
lines changed

7 files changed

+251
-1
lines changed

font-test-data/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ pub static CMAP14_FONT1: &[u8] = include_bytes!("../test_data/ttf/cmap14_font1.t
1414

1515
pub static CMAP4_SYMBOL_PUA: &[u8] = include_bytes!("../test_data/ttf/cmap4_symbol_pua.ttf");
1616

17+
pub static CMAP6: &[u8] = include_bytes!("../test_data/ttf/cmap6.ttf");
18+
19+
pub static CMAP10: &[u8] = include_bytes!("../test_data/ttf/cmap10.ttf");
20+
1721
pub static COLR_GRADIENT_RECT: &[u8] =
1822
include_bytes!("../test_data/ttf/linear_gradient_rect_colr_1.ttf");
1923

font-test-data/test_data/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ Describes the provenance, usage and generation procedures for font data used for
124124
```shell
125125
pyftsubset cousine-regular.ttf --gids=85
126126
```
127+
* _cmap6_
128+
* font: Hand rolled cmap 6 subtable
129+
* usage: testing cmap format 6 mapping and iteration
130+
131+
* _cmap10_
132+
* font: Hand rolled cmap 10 subtable
133+
* usage: testing cmap format 10 mapping and iteration
127134

128135
## rebuilding
129136
To update the binaries and extracted data, run script located at `resources/test_fonts/rebuild.sh`
164 Bytes
Binary file not shown.
156 Bytes
Binary file not shown.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="4.37">
3+
<head>
4+
<!-- Most of this table will be recalculated by the compiler -->
5+
<tableVersion value="1.0"/>
6+
<fontRevision value="1.0"/>
7+
<checkSumAdjustment value="0xe34c4a3"/>
8+
<magicNumber value="0x5f0f3cf5"/>
9+
<flags value="00000000 00000011"/>
10+
<unitsPerEm value="1024"/>
11+
<created value="Sat Nov 5 18:46:15 2022"/>
12+
<modified value="Sat Nov 5 18:46:15 2022"/>
13+
<xMin value="51"/>
14+
<yMin value="-250"/>
15+
<xMax value="998"/>
16+
<yMax value="950"/>
17+
<macStyle value="00000000 00000011"/>
18+
<lowestRecPPEM value="6"/>
19+
<fontDirectionHint value="2"/>
20+
<indexToLocFormat value="0"/>
21+
<glyphDataFormat value="0"/>
22+
</head>
23+
24+
<GlyphOrder>
25+
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
26+
<GlyphID id="0" name=".notdef"/>
27+
<GlyphID id="1" name="g1"/>
28+
<GlyphID id="2" name="g2"/>
29+
<GlyphID id="3" name="g3"/>
30+
<GlyphID id="4" name="g4"/>
31+
<GlyphID id="5" name="g5"/>
32+
</GlyphOrder>
33+
34+
<maxp>
35+
<tableVersion value="0x5000"/>
36+
<numGlyphs value="100"/>
37+
</maxp>
38+
39+
<cmap>
40+
<tableVersion version="0"/>
41+
<cmap_format_10 platformID="3" platEncID="1">
42+
000a0000 0000001a 00000000 00109423
43+
00000003 001a001b 0020
44+
</cmap_format_10>
45+
</cmap>
46+
47+
</ttFont>
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="4.37">
3+
<head>
4+
<!-- Most of this table will be recalculated by the compiler -->
5+
<tableVersion value="1.0"/>
6+
<fontRevision value="1.0"/>
7+
<checkSumAdjustment value="0xe34c4a3"/>
8+
<magicNumber value="0x5f0f3cf5"/>
9+
<flags value="00000000 00000011"/>
10+
<unitsPerEm value="1024"/>
11+
<created value="Sat Nov 5 18:46:15 2022"/>
12+
<modified value="Sat Nov 5 18:46:15 2022"/>
13+
<xMin value="51"/>
14+
<yMin value="-250"/>
15+
<xMax value="998"/>
16+
<yMax value="950"/>
17+
<macStyle value="00000000 00000011"/>
18+
<lowestRecPPEM value="6"/>
19+
<fontDirectionHint value="2"/>
20+
<indexToLocFormat value="0"/>
21+
<glyphDataFormat value="0"/>
22+
</head>
23+
24+
<GlyphOrder>
25+
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
26+
<GlyphID id="0" name=".notdef"/>
27+
<GlyphID id="1" name="g1"/>
28+
<GlyphID id="2" name="g2"/>
29+
<GlyphID id="3" name="g3"/>
30+
<GlyphID id="4" name="g4"/>
31+
<GlyphID id="5" name="g5"/>
32+
</GlyphOrder>
33+
34+
<maxp>
35+
<tableVersion value="0x5000"/>
36+
<numGlyphs value="100"/>
37+
</maxp>
38+
39+
<cmap>
40+
<tableVersion version="0"/>
41+
<cmap_format_6 platformID="3" platEncID="1" language="0">
42+
<map code="0x1723" name="g1"/>
43+
<map code="0x1724" name="g2"/>
44+
<map code="0x1725" name="g3"/>
45+
<map code="0x1726" name="g4"/>
46+
<map code="0x1727" name="g5"/>
47+
</cmap_format_6>
48+
</cmap>
49+
50+
</ttFont>

read-fonts/src/tables/cmap.rs

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ impl Iterator for Cmap4Iter<'_> {
302302
}
303303
}
304304

305-
impl Cmap6<'_> {
305+
impl<'a> Cmap6<'a> {
306306
pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
307307
let codepoint = codepoint.into();
308308

@@ -312,6 +312,76 @@ impl Cmap6<'_> {
312312
.get(idx as usize)
313313
.map(|g| GlyphId::new(g.get() as u32))
314314
}
315+
316+
/// Returns an iterator over all (codepoint, glyph identifier) pairs
317+
/// in the subtable.
318+
pub fn iter(&self) -> Cmap6Iter<'a> {
319+
Cmap6Iter {
320+
first: self.first_code() as u32,
321+
glyph_ids: self.glyph_id_array(),
322+
pos: 0,
323+
}
324+
}
325+
}
326+
327+
/// Iterator over all (codepoint, glyph identifier) pairs in
328+
/// the subtable.
329+
#[derive(Clone)]
330+
pub struct Cmap6Iter<'a> {
331+
first: u32,
332+
glyph_ids: &'a [BigEndian<u16>],
333+
pos: u32,
334+
}
335+
336+
impl Iterator for Cmap6Iter<'_> {
337+
type Item = (u32, GlyphId);
338+
339+
fn next(&mut self) -> Option<Self::Item> {
340+
let gid = self.glyph_ids.get(self.pos as usize)?.get().into();
341+
let codepoint = self.first + self.pos;
342+
self.pos += 1;
343+
Some((codepoint, gid))
344+
}
345+
}
346+
347+
impl<'a> Cmap10<'a> {
348+
pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
349+
let codepoint = codepoint.into();
350+
let idx = codepoint.checked_sub(self.start_char_code())?;
351+
self.glyph_id_array()
352+
.get(idx as usize)
353+
.map(|g| GlyphId::new(g.get() as u32))
354+
}
355+
356+
/// Returns an iterator over all (codepoint, glyph identifier) pairs
357+
/// in the subtable.
358+
pub fn iter(&self) -> Cmap10Iter<'a> {
359+
Cmap10Iter {
360+
first: self.start_char_code(),
361+
glyph_ids: self.glyph_id_array(),
362+
pos: 0,
363+
}
364+
}
365+
}
366+
367+
/// Iterator over all (codepoint, glyph identifier) pairs in
368+
/// the subtable.
369+
#[derive(Clone)]
370+
pub struct Cmap10Iter<'a> {
371+
first: u32,
372+
glyph_ids: &'a [BigEndian<u16>],
373+
pos: u32,
374+
}
375+
376+
impl Iterator for Cmap10Iter<'_> {
377+
type Item = (u32, GlyphId);
378+
379+
fn next(&mut self) -> Option<Self::Item> {
380+
let gid = self.glyph_ids.get(self.pos as usize)?.get().into();
381+
let codepoint = self.first + self.pos;
382+
self.pos += 1;
383+
Some((codepoint, gid))
384+
}
315385
}
316386

317387
/// Trait to unify constant and sequential map groups.
@@ -1005,6 +1075,78 @@ mod tests {
10051075
assert_eq!(mappings, &[(259, 236), (262, 326), (65535, 0)]);
10061076
}
10071077

1078+
const CMAP6_PAIRS: &[(u32, u32)] = &[
1079+
(0x1723, 1),
1080+
(0x1724, 2),
1081+
(0x1725, 3),
1082+
(0x1726, 4),
1083+
(0x1727, 5),
1084+
];
1085+
1086+
#[test]
1087+
fn cmap6_map() {
1088+
let font = FontRef::new(font_test_data::CMAP6).unwrap();
1089+
let cmap = font.cmap().unwrap();
1090+
let CmapSubtable::Format6(cmap6) = cmap.subtable(0).unwrap() else {
1091+
panic!("should be a format 6 subtable");
1092+
};
1093+
for (ch, gid) in CMAP6_PAIRS {
1094+
assert_eq!(cmap6.map_codepoint(*ch).unwrap().to_u32(), *gid);
1095+
}
1096+
// Check out of bounds codepoints
1097+
assert!(cmap6.map_codepoint(CMAP6_PAIRS[0].0 - 1).is_none());
1098+
assert!(cmap6
1099+
.map_codepoint(CMAP6_PAIRS.last().copied().unwrap().0 + 1)
1100+
.is_none());
1101+
}
1102+
1103+
#[test]
1104+
fn cmap6_iter() {
1105+
let font = FontRef::new(font_test_data::CMAP6).unwrap();
1106+
let cmap = font.cmap().unwrap();
1107+
let CmapSubtable::Format6(cmap6) = cmap.subtable(0).unwrap() else {
1108+
panic!("should be a format 6 subtable");
1109+
};
1110+
let pairs = cmap6
1111+
.iter()
1112+
.map(|(ch, gid)| (ch, gid.to_u32()))
1113+
.collect::<Vec<_>>();
1114+
assert_eq!(pairs, CMAP6_PAIRS);
1115+
}
1116+
1117+
const CMAP10_PAIRS: &[(u32, u32)] = &[(0x109423, 26), (0x109424, 27), (0x109425, 32)];
1118+
1119+
#[test]
1120+
fn cmap10_map() {
1121+
let font = FontRef::new(font_test_data::CMAP10).unwrap();
1122+
let cmap = font.cmap().unwrap();
1123+
let CmapSubtable::Format10(cmap10) = cmap.subtable(0).unwrap() else {
1124+
panic!("should be a format 10 subtable");
1125+
};
1126+
for (ch, gid) in CMAP10_PAIRS {
1127+
assert_eq!(cmap10.map_codepoint(*ch).unwrap().to_u32(), *gid);
1128+
}
1129+
// Check out of bounds codepoints
1130+
assert!(cmap10.map_codepoint(CMAP10_PAIRS[0].0 - 1).is_none());
1131+
assert!(cmap10
1132+
.map_codepoint(CMAP10_PAIRS.last().copied().unwrap().0 + 1)
1133+
.is_none());
1134+
}
1135+
1136+
#[test]
1137+
fn cmap10_iter() {
1138+
let font = FontRef::new(font_test_data::CMAP10).unwrap();
1139+
let cmap = font.cmap().unwrap();
1140+
let CmapSubtable::Format10(cmap10) = cmap.subtable(0).unwrap() else {
1141+
panic!("should be a format 10 subtable");
1142+
};
1143+
let pairs = cmap10
1144+
.iter()
1145+
.map(|(ch, gid)| (ch, gid.to_u32()))
1146+
.collect::<Vec<_>>();
1147+
assert_eq!(pairs, CMAP10_PAIRS);
1148+
}
1149+
10081150
#[test]
10091151
fn cmap12_iter() {
10101152
let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();

0 commit comments

Comments
 (0)