Skip to content

Commit 054d7be

Browse files
committed
feat(alice): Use original 41K patterns, add rules viewer, fix jokes
- Add --original flag to converter to exclude Mindpixel data (mp*.aiml) - Generate alice-patterns-original.json with 41,380 authentic 2001 patterns - Fix that-context wildcard matching for joke punchlines - Add interactive rules browser with search/filter/pagination - Integrate rules viewer as 'Browse Rules' tab in ALICE section The original ALICE won Loebner Prize in 2000/2001 with ~41K patterns. Mindpixel (52K crowdsourced patterns) was added later in 2004-2005 and contained many low-quality responses that shadowed correct ones.
1 parent 21f70fb commit 054d7be

File tree

8 files changed

+291972
-23
lines changed

8 files changed

+291972
-23
lines changed

demos/chatbot-evolution/convert-aiml-to-json.py

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -376,22 +376,63 @@ def save_json(self, patterns: List[Dict[str, Any]], output_file: str):
376376

377377

378378
def main():
379-
"""Main conversion function."""
380-
# Set up paths
381-
aiml_dir = Path(__file__).parent / "alice-aiml-original"
382-
output_file = Path(__file__).parent / "data" / "alice-patterns-full.json"
383-
384-
# Create converter
385-
converter = AIMLConverter(aiml_dir)
379+
import sys
386380

387-
# Convert all files
388-
patterns = converter.convert_all()
389-
390-
# Save to JSON
391-
converter.save_json(patterns, str(output_file))
381+
aiml_dir = Path(__file__).parent / "alice-aiml-original"
392382

393-
print(f"\nConversion successful!")
394-
print(f"Output: {output_file}")
383+
exclude_mindpixel = "--original" in sys.argv or "--no-mindpixel" in sys.argv
384+
385+
if exclude_mindpixel:
386+
output_file = Path(__file__).parent / "data" / "alice-patterns-original.json"
387+
print("Mode: Original ALICE (excluding Mindpixel mp*.aiml files)")
388+
print("This matches the ~41,000 pattern 2001 Loebner Prize winning version.\n")
389+
else:
390+
output_file = Path(__file__).parent / "data" / "alice-patterns-full.json"
391+
print("Mode: Full ALICE (including all files)")
392+
print("Use --original to exclude Mindpixel data for authentic 2001 version.\n")
393+
394+
converter = AIMLConverter(str(aiml_dir))
395+
396+
aiml_files = sorted(aiml_dir.glob("*.aiml"))
397+
398+
if exclude_mindpixel:
399+
aiml_files = [f for f in aiml_files if not f.name.startswith("mp")]
400+
print(f"Excluding {7} Mindpixel files (mp0-mp6.aiml)")
401+
402+
print(f"Found {len(aiml_files)} AIML files to process\n")
403+
404+
all_patterns = []
405+
for filepath in aiml_files:
406+
print(f"Processing {filepath.name}...")
407+
patterns = converter.parse_aiml_file(filepath)
408+
all_patterns.extend(patterns)
409+
converter.file_count += 1
410+
411+
print(f"\nExtracted: {converter.pattern_count} patterns")
412+
413+
all_patterns = converter.deduplicate_patterns(all_patterns)
414+
converter.pattern_count = len(all_patterns)
415+
print(f"After deduplication: {converter.pattern_count} patterns")
416+
417+
metadata = {
418+
"source": "ALICE AIML Foundation v1.0",
419+
"version": "Original 2001" if exclude_mindpixel else "Full with Mindpixel",
420+
"files_processed": converter.file_count,
421+
"total_patterns": converter.pattern_count,
422+
"includes_mindpixel": not exclude_mindpixel,
423+
"license": "GNU General Public License",
424+
"copyright": "(c) 2011 ALICE A.I. Foundation",
425+
}
426+
427+
with open(output_file, "w", encoding="utf-8") as f:
428+
json.dump(
429+
{"metadata": metadata, "patterns": all_patterns},
430+
f,
431+
indent=2,
432+
ensure_ascii=False,
433+
)
434+
435+
print(f"\nSaved {converter.pattern_count} patterns to {output_file}")
395436

396437

397438
if __name__ == "__main__":

0 commit comments

Comments
 (0)