@@ -11,11 +11,12 @@ use std::{
1111 cmp:: { max, min} ,
1212 error:: Error ,
1313 fmt:: { Debug , Display } ,
14+ mem,
1415} ;
1516
1617pub use self :: { abbrev:: AbbrevTable , selection:: symbol:: SymbolSelector } ;
1718pub use estimate:: { LaxUserFreqEstimate , UserFreqEstimate } ;
18- use log:: { debug, info, trace, warn} ;
19+ use log:: { debug, error , info, trace, warn} ;
1920
2021use crate :: {
2122 conversion:: {
@@ -700,40 +701,11 @@ impl SharedState {
700701 self . last_key_behavior = EditorKeyBehavior :: Commit ;
701702 }
702703 fn auto_learn ( & mut self , intervals : & [ Interval ] ) {
703- debug ! ( "intervals {:?}" , intervals) ;
704- let mut pending = String :: new ( ) ;
705- let mut syllables = Vec :: new ( ) ;
706- for interval in intervals {
707- if interval. is_phrase && interval. len ( ) == 1 && !is_break_word ( & interval. str ) {
708- pending. push_str ( & interval. str ) ;
709- syllables. extend_from_slice ( & self . com . symbols ( ) [ interval. start ..interval. end ] ) ;
710- } else {
711- if !pending. is_empty ( ) {
712- debug ! ( "autolearn-2 {:?} as {}" , & syllables, & pending) ;
713- let _ = self . learn_phrase ( & syllables, & pending) ;
714- pending. clear ( ) ;
715- syllables. clear ( ) ;
716- }
717- if interval. is_phrase {
718- debug ! (
719- "autolearn-3 {:?} as {}" ,
720- & self . com. symbols( ) [ interval. start..interval. end] ,
721- & interval. str
722- ) ;
723- // FIXME avoid copy
724- let _ = self . learn_phrase (
725- & self . com . symbols ( ) [ interval. start ..interval. end ] . to_vec ( ) ,
726- & interval. str ,
727- ) ;
728- }
704+ for ( symbols, phrase) in collect_new_phrases ( intervals, self . com . symbols ( ) ) {
705+ if let Err ( error) = self . learn_phrase ( & symbols, & phrase) {
706+ error ! ( "Failed to learn phrase {phrase} from {symbols:?}: {error:#}" ) ;
729707 }
730708 }
731- if !pending. is_empty ( ) {
732- debug ! ( "autolearn-1 {:?} as {}" , & syllables, & pending) ;
733- let _ = self . learn_phrase ( & syllables, & pending) ;
734- pending. clear ( ) ;
735- syllables. clear ( ) ;
736- }
737709 }
738710}
739711
@@ -751,6 +723,50 @@ fn is_break_word(word: &str) -> bool {
751723 "路" , "村" , "在" ] . contains ( & word)
752724}
753725
726+ fn collect_new_phrases ( intervals : & [ Interval ] , symbols : & [ Symbol ] ) -> Vec < ( Vec < Symbol > , String ) > {
727+ debug ! ( "intervals {:?}" , intervals) ;
728+ let mut pending = String :: new ( ) ;
729+ let mut syllables = Vec :: new ( ) ;
730+ let mut phrases = vec ! [ ] ;
731+ let mut collect = |syllables, pending| {
732+ if phrases. iter ( ) . find ( |( _, p) | p == & pending) . is_none ( ) {
733+ debug ! ( "autolearn {:?} as {}" , & syllables, & pending) ;
734+ phrases. push ( ( syllables, pending) )
735+ }
736+ } ;
737+ // Step 1. collect all intervals
738+ for interval in intervals. iter ( ) . filter ( |it| it. is_phrase ) {
739+ let syllables = symbols[ interval. start ..interval. end ] . to_vec ( ) ;
740+ let pending = interval. str . clone ( ) . into_string ( ) ;
741+ collect ( syllables, pending) ;
742+ }
743+ // Step 2. collect all intervals with length one with break words removed
744+ for interval in intervals. iter ( ) {
745+ if interval. is_phrase && interval. len ( ) == 1 && !is_break_word ( & interval. str ) {
746+ pending. push_str ( & interval. str ) ;
747+ syllables. extend_from_slice ( & symbols[ interval. start ..interval. end ] ) ;
748+ } else if !pending. is_empty ( ) {
749+ collect ( mem:: take ( & mut syllables) , mem:: take ( & mut pending) ) ;
750+ }
751+ }
752+ if !pending. is_empty ( ) {
753+ collect ( mem:: take ( & mut syllables) , mem:: take ( & mut pending) ) ;
754+ }
755+ // Step 3. collect all intervals with length one including break words
756+ for interval in intervals {
757+ if interval. is_phrase && interval. len ( ) == 1 {
758+ pending. push_str ( & interval. str ) ;
759+ syllables. extend_from_slice ( & symbols[ interval. start ..interval. end ] ) ;
760+ } else if !pending. is_empty ( ) {
761+ collect ( mem:: take ( & mut syllables) , mem:: take ( & mut pending) ) ;
762+ }
763+ }
764+ if !pending. is_empty ( ) {
765+ collect ( syllables, pending) ;
766+ }
767+ phrases
768+ }
769+
754770impl BasicEditor for Editor {
755771 fn process_keyevent ( & mut self , key_event : KeyboardEvent ) -> EditorKeyBehavior {
756772 debug ! ( "process_keyevent: {:?}" , key_event) ;
@@ -1557,7 +1573,7 @@ mod tests {
15571573 use estimate:: LaxUserFreqEstimate ;
15581574
15591575 use crate :: {
1560- conversion:: ChewingEngine ,
1576+ conversion:: { ChewingEngine , Interval , Symbol } ,
15611577 dictionary:: { Layered , TrieBuf } ,
15621578 editor:: { EditorKeyBehavior , EditorOptions , SymbolSelector , abbrev:: AbbrevTable , estimate} ,
15631579 input:: {
@@ -1566,9 +1582,10 @@ mod tests {
15661582 keysym,
15671583 } ,
15681584 syl,
1569- zhuyin:: Bopomofo ,
1585+ zhuyin:: Bopomofo as bpmf ,
15701586 } ;
15711587
1588+ use super :: collect_new_phrases;
15721589 use super :: { BasicEditor , Editor } ;
15731590
15741591 const CAPSLOCK_EVENT : KeyboardEvent = KeyboardEvent :: builder ( )
@@ -1597,7 +1614,7 @@ mod tests {
15971614 let key_behavior = editor. process_keyevent ( ev) ;
15981615
15991616 assert_eq ! ( EditorKeyBehavior :: Absorb , key_behavior) ;
1600- assert_eq ! ( syl![ Bopomofo :: C ] , editor. syllable_buffer( ) ) ;
1617+ assert_eq ! ( syl![ bpmf :: C ] , editor. syllable_buffer( ) ) ;
16011618
16021619 let ev = KeyboardEvent {
16031620 code : keycode:: KEY_K ,
@@ -1607,13 +1624,13 @@ mod tests {
16071624 let key_behavior = editor. process_keyevent ( ev) ;
16081625
16091626 assert_eq ! ( EditorKeyBehavior :: Absorb , key_behavior) ;
1610- assert_eq ! ( syl![ Bopomofo :: C , Bopomofo :: E ] , editor. syllable_buffer( ) ) ;
1627+ assert_eq ! ( syl![ bpmf :: C , bpmf :: E ] , editor. syllable_buffer( ) ) ;
16111628 }
16121629
16131630 #[ test]
16141631 fn editing_mode_input_bopomofo_commit ( ) {
16151632 let dict = TrieBuf :: from ( [ (
1616- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1633+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
16171634 vec ! [ ( "冊" , 100 ) ] ,
16181635 ) ] ) ;
16191636 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1645,7 +1662,7 @@ mod tests {
16451662 #[ test]
16461663 fn editing_mode_input_bopomofo_select ( ) {
16471664 let dict = TrieBuf :: from ( [ (
1648- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1665+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
16491666 vec ! [ ( "冊" , 100 ) , ( "測" , 200 ) ] ,
16501667 ) ] ) ;
16511668 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1693,7 +1710,7 @@ mod tests {
16931710 #[ test]
16941711 fn editing_mode_input_bopomofo_select_sorted ( ) {
16951712 let dict = TrieBuf :: from ( [ (
1696- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1713+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
16971714 vec ! [ ( "冊" , 100 ) , ( "測" , 200 ) ] ,
16981715 ) ] ) ;
16991716 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1741,7 +1758,7 @@ mod tests {
17411758 #[ test]
17421759 fn editing_mode_input_chinese_to_english_mode ( ) {
17431760 let dict = TrieBuf :: from ( [ (
1744- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1761+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
17451762 vec ! [ ( "冊" , 100 ) ] ,
17461763 ) ] ) ;
17471764 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1782,7 +1799,7 @@ mod tests {
17821799 #[ test]
17831800 fn editing_mode_input_english_to_chinese_mode ( ) {
17841801 let dict = TrieBuf :: from ( [ (
1785- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1802+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
17861803 vec ! [ ( "冊" , 100 ) ] ,
17871804 ) ] ) ;
17881805 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1839,7 +1856,7 @@ mod tests {
18391856 #[ test]
18401857 fn editing_chinese_mode_input_special_symbol ( ) {
18411858 let dict = TrieBuf :: from ( [ (
1842- vec ! [ crate :: syl![ Bopomofo :: C , Bopomofo :: E , Bopomofo :: TONE4 ] ] ,
1859+ vec ! [ crate :: syl![ bpmf :: C , bpmf :: E , bpmf :: TONE4 ] ] ,
18431860 vec ! [ ( "冊" , 100 ) ] ,
18441861 ) ] ) ;
18451862 let dict = Layered :: new ( vec ! [ Box :: new( dict) ] , Box :: new ( TrieBuf :: new_in_memory ( ) ) ) ;
@@ -1934,4 +1951,138 @@ mod tests {
19341951 assert_eq ! ( EditorKeyBehavior :: Bell , key_behavior) ;
19351952 assert_eq ! ( syl![ ] , editor. syllable_buffer( ) ) ;
19361953 }
1954+
1955+ #[ test]
1956+ fn collect_new_phrases_with_no_break_word ( ) {
1957+ let intervals = [
1958+ Interval {
1959+ start : 0 ,
1960+ end : 2 ,
1961+ is_phrase : true ,
1962+ str : "今天" . into ( ) ,
1963+ } ,
1964+ Interval {
1965+ start : 2 ,
1966+ end : 4 ,
1967+ is_phrase : true ,
1968+ str : "天氣" . into ( ) ,
1969+ } ,
1970+ Interval {
1971+ start : 4 ,
1972+ end : 6 ,
1973+ is_phrase : true ,
1974+ str : "真好" . into ( ) ,
1975+ } ,
1976+ ] ;
1977+ let symbols = [
1978+ Symbol :: Syllable ( syl ! [ bpmf:: J , bpmf:: I , bpmf:: EN ] ) ,
1979+ Symbol :: Syllable ( syl ! [ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
1980+ Symbol :: Syllable ( syl ! [ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
1981+ Symbol :: Syllable ( syl ! [ bpmf:: Q , bpmf:: I , bpmf:: TONE4 ] ) ,
1982+ Symbol :: Syllable ( syl ! [ bpmf:: ZH , bpmf:: EN ] ) ,
1983+ Symbol :: Syllable ( syl ! [ bpmf:: H , bpmf:: AU , bpmf:: TONE3 ] ) ,
1984+ ] ;
1985+ let phrases = collect_new_phrases ( & intervals, & symbols) ;
1986+ assert_eq ! (
1987+ vec![
1988+ (
1989+ vec![
1990+ Symbol :: Syllable ( syl![ bpmf:: J , bpmf:: I , bpmf:: EN ] ) ,
1991+ Symbol :: Syllable ( syl![ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
1992+ ] ,
1993+ "今天" . to_string( )
1994+ ) ,
1995+ (
1996+ vec![
1997+ Symbol :: Syllable ( syl![ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
1998+ Symbol :: Syllable ( syl![ bpmf:: Q , bpmf:: I , bpmf:: TONE4 ] ) ,
1999+ ] ,
2000+ "天氣" . to_string( )
2001+ ) ,
2002+ (
2003+ vec![
2004+ Symbol :: Syllable ( syl![ bpmf:: ZH , bpmf:: EN ] ) ,
2005+ Symbol :: Syllable ( syl![ bpmf:: H , bpmf:: AU , bpmf:: TONE3 ] ) ,
2006+ ] ,
2007+ "真好" . to_string( )
2008+ ) ,
2009+ ] ,
2010+ phrases
2011+ ) ;
2012+ }
2013+
2014+ #[ test]
2015+ fn collect_new_phrases_with_break_word ( ) {
2016+ let intervals = [
2017+ Interval {
2018+ start : 0 ,
2019+ end : 2 ,
2020+ is_phrase : true ,
2021+ str : "今天" . into ( ) ,
2022+ } ,
2023+ Interval {
2024+ start : 2 ,
2025+ end : 3 ,
2026+ is_phrase : true ,
2027+ str : "也" . into ( ) ,
2028+ } ,
2029+ Interval {
2030+ start : 3 ,
2031+ end : 4 ,
2032+ is_phrase : true ,
2033+ str : "是" . into ( ) ,
2034+ } ,
2035+ Interval {
2036+ start : 4 ,
2037+ end : 7 ,
2038+ is_phrase : true ,
2039+ str : "好天氣" . into ( ) ,
2040+ } ,
2041+ ] ;
2042+ let symbols = [
2043+ Symbol :: Syllable ( syl ! [ bpmf:: J , bpmf:: I , bpmf:: EN ] ) ,
2044+ Symbol :: Syllable ( syl ! [ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
2045+ Symbol :: Syllable ( syl ! [ bpmf:: I , bpmf:: EH , bpmf:: TONE3 ] ) ,
2046+ Symbol :: Syllable ( syl ! [ bpmf:: SH , bpmf:: TONE4 ] ) ,
2047+ Symbol :: Syllable ( syl ! [ bpmf:: H , bpmf:: AU , bpmf:: TONE3 ] ) ,
2048+ Symbol :: Syllable ( syl ! [ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
2049+ Symbol :: Syllable ( syl ! [ bpmf:: Q , bpmf:: I , bpmf:: TONE4 ] ) ,
2050+ ] ;
2051+ let phrases = collect_new_phrases ( & intervals, & symbols) ;
2052+ assert_eq ! (
2053+ vec![
2054+ (
2055+ vec![
2056+ Symbol :: Syllable ( syl![ bpmf:: J , bpmf:: I , bpmf:: EN ] ) ,
2057+ Symbol :: Syllable ( syl![ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
2058+ ] ,
2059+ "今天" . to_string( )
2060+ ) ,
2061+ (
2062+ vec![ Symbol :: Syllable ( syl![ bpmf:: I , bpmf:: EH , bpmf:: TONE3 ] ) , ] ,
2063+ "也" . to_string( )
2064+ ) ,
2065+ (
2066+ vec![ Symbol :: Syllable ( syl![ bpmf:: SH , bpmf:: TONE4 ] ) ] ,
2067+ "是" . to_string( )
2068+ ) ,
2069+ (
2070+ vec![
2071+ Symbol :: Syllable ( syl![ bpmf:: H , bpmf:: AU , bpmf:: TONE3 ] ) ,
2072+ Symbol :: Syllable ( syl![ bpmf:: T , bpmf:: I , bpmf:: AN ] ) ,
2073+ Symbol :: Syllable ( syl![ bpmf:: Q , bpmf:: I , bpmf:: TONE4 ] ) ,
2074+ ] ,
2075+ "好天氣" . to_string( )
2076+ ) ,
2077+ (
2078+ vec![
2079+ Symbol :: Syllable ( syl![ bpmf:: I , bpmf:: EH , bpmf:: TONE3 ] ) ,
2080+ Symbol :: Syllable ( syl![ bpmf:: SH , bpmf:: TONE4 ] )
2081+ ] ,
2082+ "也是" . to_string( )
2083+ ) ,
2084+ ] ,
2085+ phrases
2086+ ) ;
2087+ }
19372088}
0 commit comments