Skip to content

Commit 82cb474

Browse files
nicoburnsPotatoCP
authored andcommitted
Use built-in conversion function from unicode_script to convert script for harfbuzz (servo#38704)
Replaces a big match statement with a call to `unicode_script`'s `Script::short_name` method which has the same big match statement. We special case `Script::Unknown` because the `short_name` method returns empty string for that variant, but harfbuzz represents it with `Zzzz`. EDIT: `Zzzz` seems to be in the spec so I've sent a PR to `unicode_script` to use it: unicode-rs/unicode-script#23 EDIT: And unicode-rs/unicode-script#24 which would allow us to remove this method entirely. Signed-off-by: Nico Burns <[email protected]>
1 parent c277826 commit 82cb474

File tree

1 file changed

+14
-165
lines changed

1 file changed

+14
-165
lines changed

components/fonts/shaper.rs

Lines changed: 14 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use harfbuzz_sys::{
2323
hb_font_destroy, hb_font_funcs_create, hb_font_funcs_set_glyph_h_advance_func,
2424
hb_font_funcs_set_nominal_glyph_func, hb_font_funcs_t, hb_font_set_funcs, hb_font_set_ppem,
2525
hb_font_set_scale, hb_font_t, hb_glyph_info_t, hb_glyph_position_t, hb_ot_layout_get_baseline,
26-
hb_position_t, hb_shape, hb_tag_t,
26+
hb_position_t, hb_script_from_iso15924_tag, hb_shape, hb_tag_t,
2727
};
2828
use log::debug;
2929
use num_traits::Zero;
@@ -201,169 +201,16 @@ impl Shaper {
201201
}
202202
}
203203

204-
pub fn unicode_to_hb_script(script: unicode_script::Script) -> harfbuzz_sys::hb_script_t {
205-
use harfbuzz_sys::*;
206-
use unicode_script::Script::*;
207-
match script {
208-
Adlam => HB_SCRIPT_ADLAM,
209-
Ahom => HB_SCRIPT_AHOM,
210-
Anatolian_Hieroglyphs => HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
211-
Arabic => HB_SCRIPT_ARABIC,
212-
Armenian => HB_SCRIPT_ARMENIAN,
213-
Avestan => HB_SCRIPT_AVESTAN,
214-
Balinese => HB_SCRIPT_BALINESE,
215-
Bamum => HB_SCRIPT_BAMUM,
216-
Bassa_Vah => HB_SCRIPT_BASSA_VAH,
217-
Batak => HB_SCRIPT_BATAK,
218-
Bengali => HB_SCRIPT_BENGALI,
219-
Bhaiksuki => HB_SCRIPT_BHAIKSUKI,
220-
Bopomofo => HB_SCRIPT_BOPOMOFO,
221-
Brahmi => HB_SCRIPT_BRAHMI,
222-
Braille => HB_SCRIPT_BRAILLE,
223-
Buginese => HB_SCRIPT_BUGINESE,
224-
Buhid => HB_SCRIPT_BUHID,
225-
Canadian_Aboriginal => HB_SCRIPT_CANADIAN_SYLLABICS,
226-
Carian => HB_SCRIPT_CARIAN,
227-
Caucasian_Albanian => HB_SCRIPT_CAUCASIAN_ALBANIAN,
228-
Chakma => HB_SCRIPT_CHAKMA,
229-
Cham => HB_SCRIPT_CHAM,
230-
Cherokee => HB_SCRIPT_CHEROKEE,
231-
Chorasmian => HB_SCRIPT_CHORASMIAN,
232-
Common => HB_SCRIPT_COMMON,
233-
Coptic => HB_SCRIPT_COPTIC,
234-
Cuneiform => HB_SCRIPT_CUNEIFORM,
235-
Cypriot => HB_SCRIPT_CYPRIOT,
236-
Cyrillic => HB_SCRIPT_CYRILLIC,
237-
Deseret => HB_SCRIPT_DESERET,
238-
Devanagari => HB_SCRIPT_DEVANAGARI,
239-
Dives_Akuru => HB_SCRIPT_DIVES_AKURU,
240-
Dogra => HB_SCRIPT_DOGRA,
241-
Duployan => HB_SCRIPT_DUPLOYAN,
242-
Egyptian_Hieroglyphs => HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
243-
Elbasan => HB_SCRIPT_ELBASAN,
244-
Elymaic => HB_SCRIPT_ELYMAIC,
245-
Ethiopic => HB_SCRIPT_ETHIOPIC,
246-
Georgian => HB_SCRIPT_GEORGIAN,
247-
Glagolitic => HB_SCRIPT_GLAGOLITIC,
248-
Gothic => HB_SCRIPT_GOTHIC,
249-
Grantha => HB_SCRIPT_GRANTHA,
250-
Greek => HB_SCRIPT_GREEK,
251-
Gujarati => HB_SCRIPT_GUJARATI,
252-
Gunjala_Gondi => HB_SCRIPT_GUNJALA_GONDI,
253-
Gurmukhi => HB_SCRIPT_GURMUKHI,
254-
Han => HB_SCRIPT_HAN,
255-
Hangul => HB_SCRIPT_HANGUL,
256-
Hanifi_Rohingya => HB_SCRIPT_HANIFI_ROHINGYA,
257-
Hanunoo => HB_SCRIPT_HANUNOO,
258-
Hatran => HB_SCRIPT_HATRAN,
259-
Hebrew => HB_SCRIPT_HEBREW,
260-
Hiragana => HB_SCRIPT_HIRAGANA,
261-
Imperial_Aramaic => HB_SCRIPT_IMPERIAL_ARAMAIC,
262-
Inherited => HB_SCRIPT_INHERITED,
263-
Inscriptional_Pahlavi => HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
264-
Inscriptional_Parthian => HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
265-
Javanese => HB_SCRIPT_JAVANESE,
266-
Kaithi => HB_SCRIPT_KAITHI,
267-
Kannada => HB_SCRIPT_KANNADA,
268-
Katakana => HB_SCRIPT_KATAKANA,
269-
Kayah_Li => HB_SCRIPT_KAYAH_LI,
270-
Kharoshthi => HB_SCRIPT_KHAROSHTHI,
271-
Khitan_Small_Script => HB_SCRIPT_KHITAN_SMALL_SCRIPT,
272-
Khmer => HB_SCRIPT_KHMER,
273-
Khojki => HB_SCRIPT_KHOJKI,
274-
Khudawadi => HB_SCRIPT_KHUDAWADI,
275-
Lao => HB_SCRIPT_LAO,
276-
Latin => HB_SCRIPT_LATIN,
277-
Lepcha => HB_SCRIPT_LEPCHA,
278-
Limbu => HB_SCRIPT_LIMBU,
279-
Linear_A => HB_SCRIPT_LINEAR_A,
280-
Linear_B => HB_SCRIPT_LINEAR_B,
281-
Lisu => HB_SCRIPT_LISU,
282-
Lycian => HB_SCRIPT_LYCIAN,
283-
Lydian => HB_SCRIPT_LYDIAN,
284-
Mahajani => HB_SCRIPT_MAHAJANI,
285-
Makasar => HB_SCRIPT_MAKASAR,
286-
Malayalam => HB_SCRIPT_MALAYALAM,
287-
Mandaic => HB_SCRIPT_MANDAIC,
288-
Manichaean => HB_SCRIPT_MANICHAEAN,
289-
Marchen => HB_SCRIPT_MARCHEN,
290-
Masaram_Gondi => HB_SCRIPT_MASARAM_GONDI,
291-
Medefaidrin => HB_SCRIPT_MEDEFAIDRIN,
292-
Meetei_Mayek => HB_SCRIPT_MEETEI_MAYEK,
293-
Mende_Kikakui => HB_SCRIPT_MENDE_KIKAKUI,
294-
Meroitic_Cursive => HB_SCRIPT_MEROITIC_CURSIVE,
295-
Meroitic_Hieroglyphs => HB_SCRIPT_MEROITIC_HIEROGLYPHS,
296-
Miao => HB_SCRIPT_MIAO,
297-
Modi => HB_SCRIPT_MODI,
298-
Mongolian => HB_SCRIPT_MONGOLIAN,
299-
Mro => HB_SCRIPT_MRO,
300-
Multani => HB_SCRIPT_MULTANI,
301-
Myanmar => HB_SCRIPT_MYANMAR,
302-
Nabataean => HB_SCRIPT_NABATAEAN,
303-
Nandinagari => HB_SCRIPT_NANDINAGARI,
304-
New_Tai_Lue => HB_SCRIPT_NEW_TAI_LUE,
305-
Newa => HB_SCRIPT_NEWA,
306-
Nko => HB_SCRIPT_NKO,
307-
Nushu => HB_SCRIPT_NUSHU,
308-
Nyiakeng_Puachue_Hmong => HB_SCRIPT_NYIAKENG_PUACHUE_HMONG,
309-
Ogham => HB_SCRIPT_OGHAM,
310-
Ol_Chiki => HB_SCRIPT_OL_CHIKI,
311-
Old_Hungarian => HB_SCRIPT_OLD_HUNGARIAN,
312-
Old_Italic => HB_SCRIPT_OLD_ITALIC,
313-
Old_North_Arabian => HB_SCRIPT_OLD_NORTH_ARABIAN,
314-
Old_Permic => HB_SCRIPT_OLD_PERMIC,
315-
Old_Persian => HB_SCRIPT_OLD_PERSIAN,
316-
Old_Sogdian => HB_SCRIPT_OLD_SOGDIAN,
317-
Old_South_Arabian => HB_SCRIPT_OLD_SOUTH_ARABIAN,
318-
Old_Turkic => HB_SCRIPT_OLD_TURKIC,
319-
Oriya => HB_SCRIPT_ORIYA,
320-
Osage => HB_SCRIPT_OSAGE,
321-
Osmanya => HB_SCRIPT_OSMANYA,
322-
Pahawh_Hmong => HB_SCRIPT_PAHAWH_HMONG,
323-
Palmyrene => HB_SCRIPT_PALMYRENE,
324-
Pau_Cin_Hau => HB_SCRIPT_PAU_CIN_HAU,
325-
Phags_Pa => HB_SCRIPT_PHAGS_PA,
326-
Phoenician => HB_SCRIPT_PHOENICIAN,
327-
Psalter_Pahlavi => HB_SCRIPT_PSALTER_PAHLAVI,
328-
Rejang => HB_SCRIPT_REJANG,
329-
Runic => HB_SCRIPT_RUNIC,
330-
Samaritan => HB_SCRIPT_SAMARITAN,
331-
Saurashtra => HB_SCRIPT_SAURASHTRA,
332-
Sharada => HB_SCRIPT_SHARADA,
333-
Shavian => HB_SCRIPT_SHAVIAN,
334-
Siddham => HB_SCRIPT_SIDDHAM,
335-
SignWriting => HB_SCRIPT_SIGNWRITING,
336-
Sinhala => HB_SCRIPT_SINHALA,
337-
Sogdian => HB_SCRIPT_SOGDIAN,
338-
Sora_Sompeng => HB_SCRIPT_SORA_SOMPENG,
339-
Soyombo => HB_SCRIPT_SOYOMBO,
340-
Sundanese => HB_SCRIPT_SUNDANESE,
341-
Syloti_Nagri => HB_SCRIPT_SYLOTI_NAGRI,
342-
Syriac => HB_SCRIPT_SYRIAC,
343-
Tagalog => HB_SCRIPT_TAGALOG,
344-
Tagbanwa => HB_SCRIPT_TAGBANWA,
345-
Tai_Le => HB_SCRIPT_TAI_LE,
346-
Tai_Tham => HB_SCRIPT_TAI_THAM,
347-
Tai_Viet => HB_SCRIPT_TAI_VIET,
348-
Takri => HB_SCRIPT_TAKRI,
349-
Tamil => HB_SCRIPT_TAMIL,
350-
Tangut => HB_SCRIPT_TANGUT,
351-
Telugu => HB_SCRIPT_TELUGU,
352-
Thaana => HB_SCRIPT_THAANA,
353-
Thai => HB_SCRIPT_THAI,
354-
Tibetan => HB_SCRIPT_TIBETAN,
355-
Tifinagh => HB_SCRIPT_TIFINAGH,
356-
Tirhuta => HB_SCRIPT_TIRHUTA,
357-
Ugaritic => HB_SCRIPT_UGARITIC,
358-
Unknown => HB_SCRIPT_UNKNOWN,
359-
Vai => HB_SCRIPT_VAI,
360-
Warang_Citi => HB_SCRIPT_WARANG_CITI,
361-
Wancho => HB_SCRIPT_WANCHO,
362-
Yezidi => HB_SCRIPT_YEZIDI,
363-
Yi => HB_SCRIPT_YI,
364-
Zanabazar_Square => HB_SCRIPT_ZANABAZAR_SQUARE,
365-
_ => HB_SCRIPT_UNKNOWN,
366-
}
204+
pub fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 {
205+
let bytes: [u8; 4] = match script {
206+
unicode_script::Script::Unknown => *b"Zzzz",
207+
_ => {
208+
let short_name = script.short_name();
209+
short_name.as_bytes().try_into().unwrap()
210+
},
211+
};
212+
213+
u32::from_be_bytes(bytes)
367214
}
368215

369216
impl Shaper {
@@ -381,7 +228,9 @@ impl Shaper {
381228
},
382229
);
383230

384-
hb_buffer_set_script(hb_buffer, unicode_to_hb_script(options.script));
231+
let script =
232+
hb_script_from_iso15924_tag(unicode_script_to_iso15924_tag(options.script));
233+
hb_buffer_set_script(hb_buffer, script);
385234

386235
hb_buffer_add_utf8(
387236
hb_buffer,

0 commit comments

Comments
 (0)