@@ -968,19 +968,18 @@ impl GeneralizedState {
968968 ) -> Vec < GeneralizedPosition > {
969969 let mut successors = Vec :: new ( ) ;
970970 let n = self . max_distance as i32 ;
971- let match_index = ( offset + n) as usize ;
971+ let match_index_i32 = offset + n;
972972
973973 // Phase 3b: Complete split with phonetic validation
974974 // Extract word character that was split
975975 let word_chars: Vec < char > = word_slice. chars ( ) . collect ( ) ;
976976
977- // Phase 3b fix: If word_slice is empty, extract from full_word using absolute position
978- let word_1char = if word_chars. is_empty ( ) {
979- // Subword is empty - we need to use full_word
980- // Calculate absolute word position from offset
977+ // Phase 3b fix: Handle negative match_index or empty word_slice by using full_word
978+ let word_1char = if match_index_i32 < 0 || word_chars. is_empty ( ) {
979+ // Need to use full_word instead of word_slice
981980 let full_word_chars: Vec < char > = full_word. chars ( ) . collect ( ) ;
982- // The splitting state was entered at offset-1, so the word char is at position corresponding to offset
983- // With the corrected offset calculation (using offset not offset+1), we need to find the right position
981+ // Calculate absolute position in full word
982+ // When entering split, we did offset-1, so the word char being split is at offset+n+1
984983 let word_pos = ( offset + n + 1 ) as usize ;
985984
986985 if word_pos < full_word_chars. len ( ) && full_word_chars[ word_pos] != '$' {
@@ -991,6 +990,7 @@ impl GeneralizedState {
991990 }
992991 } else {
993992 // Normal case: extract from subword
993+ let match_index = match_index_i32 as usize ;
994994 if match_index >= word_chars. len ( ) || word_chars[ match_index] == '$' {
995995 return successors;
996996 }
@@ -1037,14 +1037,17 @@ impl GeneralizedState {
10371037
10381038 // FALLBACK: Check standard operations (bit_vector match)
10391039 // Only reached if no phonetic operation applied
1040- if errors > 0 && match_index < bit_vector. len ( ) && bit_vector. is_match ( match_index) {
1041- // Complete split: offset+0 (advance 1 word position), errors-1
1042- if let Ok ( succ) = GeneralizedPosition :: new_i (
1043- offset, // +0 (stays same!)
1044- errors - 1 , // Decrement error (was incremented on enter)
1045- self . max_distance
1046- ) {
1047- successors. push ( succ) ;
1040+ if errors > 0 && match_index_i32 >= 0 {
1041+ let match_idx = match_index_i32 as usize ;
1042+ if match_idx < bit_vector. len ( ) && bit_vector. is_match ( match_idx) {
1043+ // Complete split: offset+0 (advance 1 word position), errors-1
1044+ if let Ok ( succ) = GeneralizedPosition :: new_i (
1045+ offset, // +0 (stays same!)
1046+ errors - 1 , // Decrement error (was incremented on enter)
1047+ self . max_distance
1048+ ) {
1049+ successors. push ( succ) ;
1050+ }
10481051 }
10491052 }
10501053
@@ -1076,18 +1079,18 @@ impl GeneralizedState {
10761079
10771080 // Phase 3b: Complete split with phonetic validation
10781081 // Extract word character that was split
1079- let next_match_index = ( offset + bit_vector. len ( ) as i32 ) as usize ;
1082+ let next_match_index_i32 = offset + bit_vector. len ( ) as i32 ;
10801083 let word_chars: Vec < char > = word_slice. chars ( ) . collect ( ) ;
10811084
1082- // Phase 3b fix: If word_slice is empty, extract from full_word
1083- let word_1char = if word_chars. is_empty ( ) {
1084- // Subword is empty - use full_word to extract character
1085+ // Phase 3b fix: Handle negative or out-of-bounds index by using full_word
1086+ let word_1char = if next_match_index_i32 < 0 || word_chars. is_empty ( ) {
1087+ // Need to use full_word instead of word_slice
10851088 let full_word_chars: Vec < char > = full_word. chars ( ) . collect ( ) ;
10861089
10871090 // For M-type, calculate absolute position
1088- // M-type offset is relative to word end , so word_pos = word_len + offset
1091+ // When entering split, we did offset-1 , so add +1 to get the word char being split
10891092 let word_len = full_word_chars. len ( ) ;
1090- let word_pos = ( word_len as i32 + offset) as usize ;
1093+ let word_pos = ( word_len as i32 + offset + 1 ) as usize ;
10911094
10921095 if word_pos < full_word_chars. len ( ) && full_word_chars[ word_pos] != '$' {
10931096 full_word_chars[ word_pos] . to_string ( )
@@ -1097,7 +1100,8 @@ impl GeneralizedState {
10971100 }
10981101 } else {
10991102 // Normal case: extract from subword
1100- if next_match_index >= word_chars. len ( ) || ( next_match_index < word_chars. len ( ) && word_chars[ next_match_index] == '$' ) {
1103+ let next_match_index = next_match_index_i32 as usize ;
1104+ if next_match_index >= word_chars. len ( ) || word_chars[ next_match_index] == '$' {
11011105 return successors;
11021106 }
11031107 word_chars[ next_match_index] . to_string ( )
0 commit comments