Skip to content

Commit ca36d8c

Browse files
authored
Merge pull request #191 from pshipton/harfbuzz17
Port harfbuzz - Avoid O(n^2) behavior in mark-attachment
2 parents 7c20b28 + 5ca81bc commit ca36d8c

File tree

3 files changed

+152
-88
lines changed

3 files changed

+152
-88
lines changed

src/java.desktop/share/native/libharfbuzz/OT/Layout/GPOS/MarkBasePosFormat1.hh

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -89,54 +89,78 @@ struct MarkBasePosFormat1
8989

9090
const Coverage &get_coverage () const { return this+markCoverage; }
9191

92+
static inline bool accept (hb_buffer_t *buffer, unsigned idx)
93+
{
94+
/* We only want to attach to the first of a MultipleSubst sequence.
95+
* https://github.com/harfbuzz/harfbuzz/issues/740
96+
* Reject others...
97+
* ...but stop if we find a mark in the MultipleSubst sequence:
98+
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
99+
return !_hb_glyph_info_multiplied (&buffer->info[idx]) ||
100+
0 == _hb_glyph_info_get_lig_comp (&buffer->info[idx]) ||
101+
(idx == 0 ||
102+
_hb_glyph_info_is_mark (&buffer->info[idx - 1]) ||
103+
!_hb_glyph_info_multiplied (&buffer->info[idx - 1]) ||
104+
_hb_glyph_info_get_lig_id (&buffer->info[idx]) !=
105+
_hb_glyph_info_get_lig_id (&buffer->info[idx - 1]) ||
106+
_hb_glyph_info_get_lig_comp (&buffer->info[idx]) !=
107+
_hb_glyph_info_get_lig_comp (&buffer->info[idx - 1]) + 1
108+
);
109+
}
110+
92111
bool apply (hb_ot_apply_context_t *c) const
93112
{
94113
TRACE_APPLY (this);
95114
hb_buffer_t *buffer = c->buffer;
96115
unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint);
97116
if (likely (mark_index == NOT_COVERED)) return_trace (false);
98117

99-
/* Now we search backwards for a non-mark glyph */
118+
/* Now we search backwards for a non-mark glyph.
119+
* We don't use skippy_iter.prev() to avoid O(n^2) behavior. */
120+
100121
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
101-
skippy_iter.reset (buffer->idx, 1);
102122
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
103-
do {
104-
unsigned unsafe_from;
105-
if (!skippy_iter.prev (&unsafe_from))
123+
124+
if (c->last_base_until > buffer->idx)
125+
{
126+
c->last_base_until = 0;
127+
c->last_base = -1;
128+
}
129+
unsigned j;
130+
for (j = buffer->idx; j > c->last_base_until; j--)
131+
{
132+
auto match = skippy_iter.match (buffer->info[j - 1]);
133+
if (match == skippy_iter.MATCH)
134+
{
135+
if (!accept (buffer, j - 1))
136+
match = skippy_iter.SKIP;
137+
}
138+
if (match == skippy_iter.MATCH)
106139
{
107-
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
108-
return_trace (false);
140+
c->last_base = (signed) j - 1;
141+
break;
109142
}
143+
}
144+
c->last_base_until = buffer->idx;
145+
if (c->last_base == -1)
146+
{
147+
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
148+
return_trace (false);
149+
}
110150

111-
/* We only want to attach to the first of a MultipleSubst sequence.
112-
* https://github.com/harfbuzz/harfbuzz/issues/740
113-
* Reject others...
114-
* ...but stop if we find a mark in the MultipleSubst sequence:
115-
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
116-
if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) ||
117-
0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) ||
118-
(skippy_iter.idx == 0 ||
119-
_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) ||
120-
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) !=
121-
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) ||
122-
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) !=
123-
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1
124-
))
125-
break;
126-
skippy_iter.reject ();
127-
} while (true);
151+
unsigned idx = (unsigned) c->last_base;
128152

129153
/* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
130-
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
154+
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[idx])) { return_trace (false); }
131155

132-
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint);
156+
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[idx].codepoint);
133157
if (base_index == NOT_COVERED)
134158
{
135-
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
159+
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
136160
return_trace (false);
137161
}
138162

139-
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
163+
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, idx));
140164
}
141165

142166
bool subset (hb_subset_context_t *c) const

src/java.desktop/share/native/libharfbuzz/OT/Layout/GPOS/MarkLigPosFormat1.hh

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,24 +138,41 @@ struct MarkLigPosFormat1
138138
if (likely (mark_index == NOT_COVERED)) return_trace (false);
139139

140140
/* Now we search backwards for a non-mark glyph */
141+
141142
hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
142-
skippy_iter.reset (buffer->idx, 1);
143143
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
144-
unsigned unsafe_from;
145-
if (!skippy_iter.prev (&unsafe_from))
144+
145+
if (c->last_base_until > buffer->idx)
146146
{
147-
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
147+
c->last_base_until = 0;
148+
c->last_base = -1;
149+
}
150+
unsigned j;
151+
for (j = buffer->idx; j > c->last_base_until; j--)
152+
{
153+
auto match = skippy_iter.match (buffer->info[j - 1]);
154+
if (match == skippy_iter.MATCH)
155+
{
156+
c->last_base = (signed) j - 1;
157+
break;
158+
}
159+
}
160+
c->last_base_until = buffer->idx;
161+
if (c->last_base == -1)
162+
{
163+
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
148164
return_trace (false);
149165
}
150166

167+
unsigned idx = (unsigned) c->last_base;
168+
151169
/* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
152-
//if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
170+
//if (!_hb_glyph_info_is_ligature (&buffer->info[idx])) { return_trace (false); }
153171

154-
unsigned int j = skippy_iter.idx;
155-
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint);
172+
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[idx].codepoint);
156173
if (lig_index == NOT_COVERED)
157174
{
158-
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
175+
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
159176
return_trace (false);
160177
}
161178

@@ -166,7 +183,7 @@ struct MarkLigPosFormat1
166183
unsigned int comp_count = lig_attach.rows;
167184
if (unlikely (!comp_count))
168185
{
169-
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
186+
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
170187
return_trace (false);
171188
}
172189

@@ -175,15 +192,15 @@ struct MarkLigPosFormat1
175192
* can directly use the component index. If not, we attach the mark
176193
* glyph to the last component of the ligature. */
177194
unsigned int comp_index;
178-
unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[j]);
195+
unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[idx]);
179196
unsigned int mark_id = _hb_glyph_info_get_lig_id (&buffer->cur());
180197
unsigned int mark_comp = _hb_glyph_info_get_lig_comp (&buffer->cur());
181198
if (lig_id && lig_id == mark_id && mark_comp > 0)
182199
comp_index = hb_min (comp_count, _hb_glyph_info_get_lig_comp (&buffer->cur())) - 1;
183200
else
184201
comp_index = comp_count - 1;
185202

186-
return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, j));
203+
return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, idx));
187204
}
188205

189206
bool subset (hb_subset_context_t *c) const

src/java.desktop/share/native/libharfbuzz/hb-ot-layout-gsubgpos.hh

Lines changed: 72 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -517,34 +517,56 @@ struct hb_ot_apply_context_t :
517517
may_skip (const hb_glyph_info_t &info) const
518518
{ return matcher.may_skip (c, info); }
519519

520-
bool next (unsigned *unsafe_to = nullptr)
520+
enum match_t {
521+
MATCH,
522+
NOT_MATCH,
523+
SKIP
524+
};
525+
526+
match_t match (hb_glyph_info_t &info)
521527
{
522-
assert (num_items > 0);
523-
while (idx + num_items < end)
524-
{
525-
idx++;
526-
hb_glyph_info_t &info = c->buffer->info[idx];
528+
matcher_t::may_skip_t skip = matcher.may_skip (c, info);
529+
if (unlikely (skip == matcher_t::SKIP_YES))
530+
return SKIP;
527531

528-
matcher_t::may_skip_t skip = matcher.may_skip (c, info);
529-
if (unlikely (skip == matcher_t::SKIP_YES))
530-
continue;
532+
matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
533+
if (match == matcher_t::MATCH_YES ||
534+
(match == matcher_t::MATCH_MAYBE &&
535+
skip == matcher_t::SKIP_NO))
536+
return MATCH;
531537

532-
matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
533-
if (match == matcher_t::MATCH_YES ||
534-
(match == matcher_t::MATCH_MAYBE &&
535-
skip == matcher_t::SKIP_NO))
536-
{
537-
num_items--;
538-
if (match_glyph_data) match_glyph_data++;
539-
return true;
540-
}
538+
if (skip == matcher_t::SKIP_NO)
539+
return NOT_MATCH;
541540

542-
if (skip == matcher_t::SKIP_NO)
543-
{
544-
if (unsafe_to)
545-
*unsafe_to = idx + 1;
546-
return false;
547-
}
541+
return SKIP;
542+
}
543+
544+
bool next (unsigned *unsafe_to = nullptr)
545+
{
546+
assert (num_items > 0);
547+
/* The alternate condition below is faster at string boundaries,
548+
* but produces subpar "unsafe-to-concat" values. */
549+
signed stop = (signed) end - (signed) num_items;
550+
while ((signed) idx < stop)
551+
{
552+
idx++;
553+
switch (match (c->buffer->info[idx]))
554+
{
555+
case MATCH:
556+
{
557+
num_items--;
558+
if (match_glyph_data) match_glyph_data++;
559+
return true;
560+
}
561+
case NOT_MATCH:
562+
{
563+
if (unsafe_to)
564+
*unsafe_to = idx + 1;
565+
return false;
566+
}
567+
case SKIP:
568+
continue;
569+
}
548570
}
549571
if (unsafe_to)
550572
*unsafe_to = end;
@@ -553,31 +575,29 @@ struct hb_ot_apply_context_t :
553575
bool prev (unsigned *unsafe_from = nullptr)
554576
{
555577
assert (num_items > 0);
556-
while (idx > num_items - 1)
578+
/* The alternate condition below is faster at string boundaries,
579+
* but produces subpar "unsafe-to-concat" values. */
580+
unsigned stop = num_items - 1;
581+
while (idx > stop)
557582
{
558-
idx--;
559-
hb_glyph_info_t &info = c->buffer->out_info[idx];
560-
561-
matcher_t::may_skip_t skip = matcher.may_skip (c, info);
562-
if (unlikely (skip == matcher_t::SKIP_YES))
563-
continue;
564-
565-
matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
566-
if (match == matcher_t::MATCH_YES ||
567-
(match == matcher_t::MATCH_MAYBE &&
568-
skip == matcher_t::SKIP_NO))
569-
{
570-
num_items--;
571-
if (match_glyph_data) match_glyph_data++;
572-
return true;
573-
}
574-
575-
if (skip == matcher_t::SKIP_NO)
576-
{
577-
if (unsafe_from)
578-
*unsafe_from = hb_max (1u, idx) - 1u;
579-
return false;
580-
}
583+
idx--;
584+
switch (match (c->buffer->out_info[idx]))
585+
{
586+
case MATCH:
587+
{
588+
num_items--;
589+
if (match_glyph_data) match_glyph_data++;
590+
return true;
591+
}
592+
case NOT_MATCH:
593+
{
594+
if (unsafe_from)
595+
*unsafe_from = hb_max (1u, idx) - 1u;
596+
return false;
597+
}
598+
case SKIP:
599+
continue;
600+
}
581601
}
582602
if (unsafe_from)
583603
*unsafe_from = 0;
@@ -640,6 +660,9 @@ struct hb_ot_apply_context_t :
640660
uint32_t random_state = 1;
641661
unsigned new_syllables = (unsigned) -1;
642662

663+
signed last_base = -1; // GPOS uses
664+
unsigned last_base_until = 0; // GPOS uses
665+
643666
hb_ot_apply_context_t (unsigned int table_index_,
644667
hb_font_t *font_,
645668
hb_buffer_t *buffer_) :
@@ -677,7 +700,7 @@ struct hb_ot_apply_context_t :
677700
iter_context.init (this, true);
678701
}
679702

680-
void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); }
703+
void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; last_base = -1; last_base_until = 0; init_iters (); }
681704
void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); }
682705
void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); }
683706
void set_per_syllable (bool per_syllable_) { per_syllable = per_syllable_; init_iters (); }

0 commit comments

Comments
 (0)