Autumn_AI/test_g2p_tts_integration.py at main · shhreyuuFW/Autumn_AI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
Test G2P-Enhanced TTS Pipeline Integration
Tests Misaki G2P integration with Kokoro TTS for improved pronunciation.
"""

import asyncio
import logging
import sys
from pathlib import Path
import tempfile
import os

# Add the project root to the path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

async def test_g2p_tts_integration():
    """Test the complete G2P + TTS pipeline."""
    print("Testing G2P-Enhanced TTS Pipeline")
    print("=" * 50)

    try:
        from core.kokoro_tts import KokoroTTSEngine
        from config.settings import Settings

        # Initialize settings
        settings = Settings()
        print(f"G2P enabled in settings: {settings.tts.use_g2p}")
        print(f"G2P engine: {settings.tts.g2p_engine}")
        print(f"Espeak fallback: {settings.tts.use_espeak_fallback}")

        # Create TTS engine
        print("\nInitializing Kokoro TTS with G2P...")
        tts_engine = KokoroTTSEngine()

        # Initialize the TTS engine (this will also initialize G2P)
        if not await tts_engine.initialize():
            print("❌ Failed to initialize TTS engine")
            return

        print("✅ TTS engine initialized successfully")

        # Check if G2P engine was loaded
        if tts_engine.g2p_engine and tts_engine.g2p_engine.is_initialized:
            print("✅ G2P engine integrated successfully")

            # Show G2P stats
            stats = tts_engine.g2p_engine.get_stats()
            print(f"G2P Status: {stats}")
        else:
            print("⚠️ G2P engine not available (continuing with traditional TTS)")

        # Test text preprocessing
        test_texts = [
            "Hello, I'm Autumn, your AI assistant.",
            "Kokoro TTS with Misaki G2P provides better pronunciation.",
            "Dr. Smith's appointment is at 3:30 PM.",
            "The API returns JSON data with HTTP status codes.",
            "Technical terms like AI, TTS, and G2P are handled correctly."
        ]

        print("\nTesting text preprocessing:")
        print("-" * 40)

        for i, text in enumerate(test_texts, 1):
            print(f"\nTest {i}: '{text}'")

            # Test the text cleaning/preprocessing
            processed_text = await tts_engine.clean_text_for_tts(text)
            print(f"Processed: '{processed_text}'")

            # Test speech generation (if TTS is fully available)
            if tts_engine.is_initialized:
                print("Generating speech...")
                with tempfile.TemporaryDirectory() as temp_dir:
                    output_file = Path(temp_dir) / f"test_speech_{i}.wav"

                    # Generate speech
                    speech_file = await tts_engine.generate_speech(
                        text=text,
                        emotion="neutral",
                        output_file=str(output_file)
                    )

                    if speech_file and os.path.exists(speech_file):
                        file_size = os.path.getsize(speech_file)
                        print(f"✅ Speech generated: {Path(speech_file).name} ({file_size} bytes)")
                    else:
                        print("⚠️ Speech generation failed (may be normal without full TTS setup)")

            print()

        # Test custom pronunciations if G2P is available
        if tts_engine.g2p_engine:
            print("\nTesting custom pronunciations:")
            print("-" * 35)

            # Add a custom pronunciation
            tts_engine.g2p_engine.add_custom_pronunciation("Misaki", "misɑki")

            custom_test = "Misaki G2P engine provides accurate phonemes"
            processed = await tts_engine.clean_text_for_tts(custom_test)
            print(f"Input:     '{custom_test}'")
            print(f"Processed: '{processed}'")

        # Performance summary
        print("\nPerformance Summary:")
        print("-" * 25)
        if tts_engine.g2p_engine:
            g2p_stats = tts_engine.g2p_engine.get_stats()
            print(f"G2P conversions: {g2p_stats.get('total_conversions', 0)}")
            print(f"Average G2P time: {g2p_stats.get('average_conversion_time', 0):.3f}s")
            print(f"Cache hit rate: {g2p_stats.get('cache_hit_rate', 0):.1%}")

        print(f"TTS initialized: {tts_engine.is_initialized}")
        print(f"Using ONNX: {getattr(tts_engine, 'use_onnx', False)}")
        print(f"G2P available: {tts_engine.g2p_engine is not None}")

        # Clean shutdown
        await tts_engine.shutdown()
        print("\n✅ G2P-Enhanced TTS pipeline test completed successfully")

    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

async def test_pronunciation_comparison():
    """Compare traditional vs G2P-enhanced text processing."""
    print("\n" + "=" * 50)
    print("Pronunciation Comparison Test")
    print("=" * 50)

    try:
        from core.g2p_engine import create_g2p_engine

        # Create G2P engine
        g2p_engine = await create_g2p_engine(
            use_transformer=False,
            british=False,
            use_espeak_fallback=True
        )

        if not g2p_engine:
            print("❌ G2P engine not available for comparison")
            return

        test_cases = [
            "The API returns JSON data",
            "Dr. Smith's research on AI",
            "HTTP vs HTTPS protocols",
            "SQL database queries",
            "Machine learning algorithms"
        ]

        print("\nTraditional vs G2P-Enhanced Processing:")
        print("-" * 45)

        for text in test_cases:
            print(f"\nOriginal: '{text}'")

            # Traditional cleaning (basic)
            import re
            traditional = re.sub(r'[^\x00-\x7F]+', '', text)
            traditional = traditional.replace('Dr.', 'Doctor')
            traditional = traditional.replace('vs', 'versus')
            print(f"Traditional: '{traditional}'")

            # G2P enhanced
            phonemes, _ = await g2p_engine.convert_text(text)
            print(f"G2P Enhanced: '{phonemes}'")

        await g2p_engine.shutdown()
        print("\n✅ Pronunciation comparison completed")

    except Exception as e:
        print(f"❌ Comparison test failed: {e}")

if __name__ == "__main__":
    async def main():
        await test_g2p_tts_integration()
        await test_pronunciation_comparison()

    asyncio.run(main())