@@ -49,66 +49,125 @@ export class PatternMatcher {
4949 }
5050
5151 /**
52- * Expand synonyms in pattern
52+ * Check if a word matches a pattern part (handles @synonyms)
5353 */
54- expandSynonyms ( pattern , synonyms ) {
55- let expanded = pattern ;
56- const regex = / @ ( \w + ) / g;
57- let match ;
54+ matchWord ( word , patternPart , synonyms ) {
55+ word = word . toLowerCase ( ) ;
5856
59- while ( ( match = regex . exec ( pattern ) ) !== null ) {
60- const synKey = match [ 1 ] ;
57+ if ( patternPart === '*' ) {
58+ return 'wildcard' ;
59+ }
60+
61+ if ( patternPart . startsWith ( '@' ) ) {
62+ // Synonym match
63+ const synKey = patternPart . substring ( 1 ) ;
6164 if ( synonyms [ synKey ] ) {
62- const alternatives = synonyms [ synKey ] . join ( '|' ) ;
63- expanded = expanded . replace ( match [ 0 ] , `(${ alternatives } )` ) ;
65+ // Check if word matches the synonym group name or any synonym
66+ if ( word === synKey || synonyms [ synKey ] . includes ( word ) ) {
67+ return 'synonym' ;
68+ }
6469 }
70+ return false ;
71+ }
72+
73+ // Exact match
74+ if ( word === patternPart . toLowerCase ( ) ) {
75+ return 'exact' ;
6576 }
6677
67- return expanded ;
78+ return false ;
6879 }
6980
7081 /**
71- * Convert ELIZA pattern to regex
82+ * Word-based pattern matching (like Python implementation)
83+ * Pattern parts: ['*', 'i', 'am', '*', '@sad', '*']
84+ * Input words: ['i', 'am', 'unhappy']
85+ * Returns: { matched: true, captures: [[], [], ['unhappy'], []] }
7286 */
73- patternToRegex ( pattern , synonyms ) {
74- // Escape special regex characters first (before synonym expansion)
75- // Note: We escape . + ^ $ { } [ ] \ but NOT * () | ? :
76- // * and () are used in ELIZA patterns, | ? : are used in synonym groups
77- let regexPattern = pattern . replace ( / [ . + ^ $ { } [ \] \\ ] / g, '\\$&' ) ;
87+ matchPattern ( input , pattern , synonyms ) {
88+ // Split input into words, stripping punctuation
89+ const inputWords = input . toLowerCase ( )
90+ . replace ( / [ . , ! ? ; : ] / g, ' ' )
91+ . split ( / \s + / )
92+ . filter ( w => w . length > 0 ) ;
7893
79- // Expand synonyms after escaping to preserve (?:...) syntax
80- regexPattern = this . expandSynonyms ( regexPattern , synonyms ) ;
94+ // Split pattern into parts
95+ const patternParts = pattern . toLowerCase ( )
96+ . split ( / \s + / )
97+ . filter ( p => p . length > 0 ) ;
8198
82- // Convert ELIZA wildcards to regex
83- // * matches any sequence of words (non-greedy to match minimal text)
84- regexPattern = regexPattern . replace ( / \* / g, '(.*?)' ) ;
99+ // Use recursive matching
100+ const result = this . _matchHelper ( inputWords , patternParts , synonyms ) ;
85101
86- // Add anchors
87- regexPattern = '^' + regexPattern + '$' ;
102+ if ( result . matched ) {
103+ return {
104+ matched : true ,
105+ captures : result . captures ,
106+ pattern,
107+ patternParts
108+ } ;
109+ }
88110
89- return new RegExp ( regexPattern , 'i' ) ;
111+ return { matched : false , pattern , patternParts } ;
90112 }
91113
92114 /**
93- * Match input against pattern and extract components
115+ * Recursive helper for word-based pattern matching
94116 */
95- matchPattern ( input , pattern , synonyms ) {
96- const regex = this . patternToRegex ( pattern , synonyms ) ;
97- // Pad input with spaces to handle patterns starting/ending with *
98- const paddedInput = ' ' + input + ' ' ;
99- const match = paddedInput . match ( regex ) ;
117+ _matchHelper ( words , parts , synonyms ) {
118+ // Base case: no more pattern parts
119+ if ( parts . length === 0 ) {
120+ // Match only if no more words either
121+ return { matched : words . length === 0 , captures : [ ] } ;
122+ }
100123
101- if ( match ) {
102- const captures = match . slice ( 1 ) . map ( g => ( g || '' ) . trim ( ) ) ;
103- return {
104- matched : true ,
105- captures,
106- pattern,
107- regex : regex . toString ( )
108- } ;
124+ const [ currentPart , ...remainingParts ] = parts ;
125+
126+ if ( currentPart === '*' ) {
127+ // Wildcard: try matching 0, 1, 2, ... words
128+ // Try shortest match first (greedy would cause issues)
129+ for ( let i = 0 ; i <= words . length ; i ++ ) {
130+ const capturedWords = words . slice ( 0 , i ) ;
131+ const remainingWords = words . slice ( i ) ;
132+
133+ const result = this . _matchHelper ( remainingWords , remainingParts , synonyms ) ;
134+ if ( result . matched ) {
135+ return {
136+ matched : true ,
137+ captures : [ capturedWords , ...result . captures ]
138+ } ;
139+ }
140+ }
141+ return { matched : false , captures : [ ] } ;
142+ }
143+
144+ // Non-wildcard: must match the first word
145+ if ( words . length === 0 ) {
146+ return { matched : false , captures : [ ] } ;
147+ }
148+
149+ const [ currentWord , ...remainingWords ] = words ;
150+ const matchType = this . matchWord ( currentWord , currentPart , synonyms ) ;
151+
152+ if ( matchType ) {
153+ const result = this . _matchHelper ( remainingWords , remainingParts , synonyms ) ;
154+ if ( result . matched ) {
155+ // For synonym matches, capture the matched word
156+ if ( matchType === 'synonym' ) {
157+ return {
158+ matched : true ,
159+ captures : [ [ currentWord ] , ...result . captures ]
160+ } ;
161+ }
162+ // For exact matches, don't capture
163+ return {
164+ matched : true ,
165+ captures : result . captures
166+ } ;
167+ }
109168 }
110169
111- return { matched : false , pattern , regex : regex . toString ( ) } ;
170+ return { matched : false , captures : [ ] } ;
112171 }
113172
114173 /**
@@ -120,22 +179,37 @@ export class PatternMatcher {
120179 . replace ( / [ . , ! ? ; : ] / g, ' ' )
121180 . split ( / \s + / )
122181 . filter ( w => w . length > 0 ) ;
123- const matchedRules = [ ] ;
124182
125- // Find all rules with keywords present in input
183+ // Build a map of keyword -> rule for quick lookup
184+ const rulesByKeyword = new Map ( ) ;
126185 for ( const rule of rules ) {
127- const keyword = rule . keyword . toLowerCase ( ) ;
128- if ( words . includes ( keyword ) ) {
186+ rulesByKeyword . set ( rule . keyword . toLowerCase ( ) , rule ) ;
187+ }
188+
189+ // Find keywords by iterating through INPUT words (like Python implementation)
190+ // This ensures keywords are found in the order they appear in the input
191+ const matchedRules = [ ] ;
192+ const seenKeywords = new Set ( ) ;
193+
194+ for ( const word of words ) {
195+ const rule = rulesByKeyword . get ( word ) ;
196+ if ( rule && ! seenKeywords . has ( word ) ) {
129197 matchedRules . push ( rule ) ;
198+ seenKeywords . add ( word ) ;
130199 }
131200 }
132201
133202 // Sort by rank (higher rank = higher priority)
203+ // Use stable sort so equal ranks preserve input word order
134204 matchedRules . sort ( ( a , b ) => ( b . rank || 0 ) - ( a . rank || 0 ) ) ;
135205
136- // Try to match patterns for each rule
206+ // Try to match patterns for each rule (in rank order)
207+ // For each rule, try specific patterns first, then catch-all
137208 for ( const rule of matchedRules ) {
209+ // First try specific patterns
138210 for ( const patternObj of rule . patterns ) {
211+ if ( patternObj . pattern === '*' ) continue ;
212+
139213 const matchResult = this . matchPattern ( input , patternObj . pattern , synonyms ) ;
140214
141215 if ( matchResult . matched ) {
@@ -147,9 +221,24 @@ export class PatternMatcher {
147221 } ;
148222 }
149223 }
224+
225+ // Then try catch-all for this rule
226+ for ( const patternObj of rule . patterns ) {
227+ if ( patternObj . pattern === '*' ) {
228+ const matchResult = this . matchPattern ( input , patternObj . pattern , synonyms ) ;
229+ if ( matchResult . matched ) {
230+ return {
231+ rule,
232+ pattern : patternObj ,
233+ matchResult,
234+ allTestedRules : matchedRules
235+ } ;
236+ }
237+ }
238+ }
150239 }
151240
152- // No keyword matched, try catch-all patterns
241+ // No keyword matched, try catch-all patterns from any rule
153242 for ( const rule of rules ) {
154243 for ( const patternObj of rule . patterns ) {
155244 if ( patternObj . pattern === '*' ) {
@@ -169,6 +258,7 @@ export class PatternMatcher {
169258
170259 /**
171260 * Assemble response from template and captures
261+ * Captures are now arrays of words (from word-based matching)
172262 */
173263 assembleResponse ( template , captures , postSubstitutions ) {
174264 let response = template ;
@@ -178,8 +268,13 @@ export class PatternMatcher {
178268 for ( let i = 0 ; i < captures . length ; i ++ ) {
179269 const placeholder = `(${ i + 1 } )` ;
180270 if ( response . includes ( placeholder ) ) {
271+ // Convert capture (array of words) to string
272+ const captureText = Array . isArray ( captures [ i ] )
273+ ? captures [ i ] . join ( ' ' )
274+ : captures [ i ] ;
275+
181276 // Apply post-substitutions to captured text
182- const { result } = this . applyPostSubstitutions ( captures [ i ] , postSubstitutions ) ;
277+ const { result } = this . applyPostSubstitutions ( captureText , postSubstitutions ) ;
183278
184279 // Use a temporary marker to preserve the text that should stay lowercase
185280 const marker = `__CAPTURE_${ i } __` ;
@@ -214,21 +309,28 @@ export class PatternMatcher {
214309
215310 const processedInput = preSubResult . result ;
216311
217- // Step 2: Keyword detection
312+ // Step 2: Keyword detection (using input word order)
218313 // Split and clean words, removing punctuation
219314 const words = processedInput . toLowerCase ( )
220315 . replace ( / [ . , ! ? ; : ] / g, ' ' )
221316 . split ( / \s + / )
222317 . filter ( w => w . length > 0 ) ;
223- const keywordsFound = [ ] ;
224- const keywordsNotFound = [ ] ;
225318
319+ // Build a map of keyword -> rule for quick lookup
320+ const rulesByKeyword = new Map ( ) ;
226321 for ( const rule of rules ) {
227- const keyword = rule . keyword . toLowerCase ( ) ;
228- if ( words . includes ( keyword ) ) {
229- keywordsFound . push ( { keyword, rank : rule . rank || 0 , rule } ) ;
230- } else {
231- keywordsNotFound . push ( keyword ) ;
322+ rulesByKeyword . set ( rule . keyword . toLowerCase ( ) , rule ) ;
323+ }
324+
325+ // Find keywords by iterating through INPUT words
326+ const keywordsFound = [ ] ;
327+ const seenKeywords = new Set ( ) ;
328+
329+ for ( const word of words ) {
330+ const rule = rulesByKeyword . get ( word ) ;
331+ if ( rule && ! seenKeywords . has ( word ) ) {
332+ keywordsFound . push ( { keyword : rule . keyword . toLowerCase ( ) , rank : rule . rank || 0 , rule } ) ;
333+ seenKeywords . add ( word ) ;
232334 }
233335 }
234336
@@ -242,21 +344,41 @@ export class PatternMatcher {
242344 details : `Found ${ keywordsFound . length } keyword(s), testing in priority order`
243345 } ) ;
244346
245- // Step 3: Pattern matching
347+ // Step 3: Pattern matching (specific patterns first, then catch-all)
246348 const patternTests = [ ] ;
247349 let matchedRule = null ;
248350 let matchedPattern = null ;
249351 let matchResult = null ;
250352
251- // Test each keyword's patterns
353+ // First pass: try specific patterns (not catch-all '*')
252354 for ( const { keyword, rule } of keywordsFound ) {
253355 for ( const patternObj of rule . patterns ) {
356+ if ( patternObj . pattern === '*' ) continue ;
357+
358+ const result = this . matchPattern ( processedInput , patternObj . pattern , synonyms ) ;
359+ patternTests . push ( {
360+ keyword,
361+ pattern : patternObj . pattern ,
362+ matched : result . matched ,
363+ captures : result . captures
364+ } ) ;
365+
366+ if ( result . matched && ! matchedRule ) {
367+ matchedRule = rule ;
368+ matchedPattern = patternObj ;
369+ matchResult = result ;
370+ }
371+ }
372+
373+ // Then try catch-all for this keyword
374+ for ( const patternObj of rule . patterns ) {
375+ if ( patternObj . pattern !== '*' ) continue ;
376+
254377 const result = this . matchPattern ( processedInput , patternObj . pattern , synonyms ) ;
255378 patternTests . push ( {
256379 keyword,
257380 pattern : patternObj . pattern ,
258381 matched : result . matched ,
259- regex : result . regex ,
260382 captures : result . captures
261383 } ) ;
262384
@@ -268,7 +390,7 @@ export class PatternMatcher {
268390 }
269391 }
270392
271- // If no keyword matched, try fallback patterns
393+ // If no keyword matched, try catch-all patterns from any rule
272394 if ( ! matchedRule ) {
273395 for ( const rule of rules ) {
274396 for ( const patternObj of rule . patterns ) {
@@ -278,7 +400,6 @@ export class PatternMatcher {
278400 keyword : rule . keyword ,
279401 pattern : patternObj . pattern ,
280402 matched : result . matched ,
281- regex : result . regex ,
282403 captures : result . captures
283404 } ) ;
284405
@@ -308,7 +429,7 @@ export class PatternMatcher {
308429 description : 'Extracting parts from input' ,
309430 input : processedInput ,
310431 output : matchResult . captures . length > 0
311- ? matchResult . captures . map ( ( c , i ) => `(${ i + 1 } ): "${ c } "` ) . join ( ', ' )
432+ ? matchResult . captures . map ( ( c , i ) => `(${ i + 1 } ): "${ Array . isArray ( c ) ? c . join ( ' ' ) : c } "` ) . join ( ', ' )
312433 : 'No captures' ,
313434 details : `Extracted ${ matchResult . captures . length } component(s)`
314435 } ) ;
@@ -338,13 +459,15 @@ export class PatternMatcher {
338459 for ( let i = 0 ; i < matchResult . captures . length ; i ++ ) {
339460 const placeholder = `(${ i + 1 } )` ;
340461 if ( assembledResponse . includes ( placeholder ) ) {
462+ // Convert capture (array of words) to string
341463 const capture = matchResult . captures [ i ] ;
342- const { result : reflected , steps } = this . applyPostSubstitutions ( capture , postSubstitutions ) ;
464+ const captureText = Array . isArray ( capture ) ? capture . join ( ' ' ) : capture ;
465+ const { result : reflected , steps } = this . applyPostSubstitutions ( captureText , postSubstitutions ) ;
343466
344467 if ( steps . length > 0 ) {
345468 postSubSteps . push ( {
346469 capture : i + 1 ,
347- original : capture ,
470+ original : captureText ,
348471 reflected,
349472 substitutions : steps
350473 } ) ;
0 commit comments