Skip to content

Commit 9790e56

Browse files
committed
conversion: simplify phrase probability calculation
1 parent 9eb9350 commit 9790e56

File tree

3 files changed

+16
-29
lines changed

3 files changed

+16
-29
lines changed

src/conversion/chewing.rs

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub struct ChewingEngine {
2020
}
2121

2222
impl ChewingEngine {
23-
const MAX_OUT_PATHS: usize = 300;
23+
const MAX_OUT_PATHS: usize = 100;
2424
/// Creates a new conversion engine.
2525
pub fn new() -> ChewingEngine {
2626
ChewingEngine {
@@ -169,7 +169,18 @@ impl ChewingEngine {
169169
true
170170
})
171171
.map(|phrase| {
172-
let log_prob = (phrase.freq().clamp(1, 9999999) as f64 / global_total).ln();
172+
let log_phrase_prob = (phrase.freq().clamp(1, 9999999) as f64 / global_total).ln();
173+
let log_length_prob: f64 = match syllables.len() {
174+
// log probability of phrase lenght calculated from tsi.src
175+
1 => -1.520439227173415,
176+
2 => -0.4236568120124837,
177+
3 => -1.455835986003893,
178+
4 => -1.6178072894679227,
179+
5 => -4.425765184802149,
180+
_ => -4.787357595622411,
181+
};
182+
let log_prob = log_phrase_prob + log_length_prob;
183+
debug_assert!(log_prob.is_normal());
173184
PossiblePhrase::Phrase(phrase, log_prob)
174185
})
175186
.collect::<Vec<_>>();
@@ -427,12 +438,6 @@ impl Debug for PossibleInterval {
427438
}
428439
}
429440

430-
impl PossibleInterval {
431-
fn len(&self) -> usize {
432-
self.end - self.start
433-
}
434-
}
435-
436441
impl From<PossibleInterval> for Interval {
437442
fn from(value: PossibleInterval) -> Self {
438443
Interval {
@@ -455,8 +460,6 @@ struct PossiblePath {
455460
impl Debug for PossiblePath {
456461
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
457462
f.debug_struct("PossiblePath")
458-
.field("phrase_log_probability()", &self.phrase_log_probability())
459-
.field("length_log_probability()", &self.length_log_probability())
460463
.field("total_probability()", &self.total_probability())
461464
.field("intervals", &self.intervals)
462465
.finish()
@@ -465,27 +468,13 @@ impl Debug for PossiblePath {
465468

466469
impl PossiblePath {
467470
fn total_probability(&self) -> f64 {
468-
let prob = self.phrase_log_probability() + self.length_log_probability();
471+
let prob = self.phrase_log_probability();
469472
debug_assert!(!prob.is_nan());
470473
prob
471474
}
472475
fn phrase_log_probability(&self) -> f64 {
473476
self.intervals.iter().map(|it| it.phrase.log_prob()).sum()
474477
}
475-
fn length_log_probability(&self) -> f64 {
476-
self.intervals
477-
.iter()
478-
.map(|it| match it.len() {
479-
// log probability of phrase lenght calculated from tsi.src
480-
1 => -1.520439227173415,
481-
2 => -0.4236568120124837,
482-
3 => -1.455835986003893,
483-
4 => -1.6178072894679227,
484-
5 => -4.425765184802149,
485-
_ => -4.787357595622411,
486-
})
487-
.sum()
488-
}
489478
}
490479

491480
impl PartialEq for PossiblePath {

src/conversion/simple.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::iter;
2-
31
use crate::{
42
conversion::Outcome,
53
dictionary::{Dictionary, LookupStrategy},

tests/test-bopomofo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -994,10 +994,10 @@ void test_Tab_at_the_end()
994994
ok_preedit_buffer(ctx, "測試一下");
995995

996996
type_keystroke_by_string(ctx, "<T>");
997-
ok_preedit_buffer(ctx, "測試儀下");
997+
ok_preedit_buffer(ctx, "策試一下");
998998

999999
type_keystroke_by_string(ctx, "<T>");
1000-
ok_preedit_buffer(ctx, "測試遺下");
1000+
ok_preedit_buffer(ctx, "測試儀下");
10011001

10021002
chewing_delete(ctx);
10031003
}

0 commit comments

Comments
 (0)