feat(chatbot-evolution): Improve GPT bot with chat history, model selector, and code formatting

jeremymanning · jeremymanning · commit 03b493b474e2 · 2026-01-04T02:00:56.000-05:00
- Add conversation history preservation (last 6 messages)
- Add model selection dropdown (Qwen2.5 / SmolLM)
- Add clear chat button
- Add markdown code block formatting with syntax highlighting
- Improve system prompt for better responses
- Update all DeepSeek-R1 references to Qwen2.5
- Update architecture diagrams and specs for current models
- Add CSS for model selector and code blocks
diff --git a/demos/chatbot-evolution/css/chatbot-evolution.css b/demos/chatbot-evolution/css/chatbot-evolution.css
@@ -1533,3 +1533,80 @@ a.paper-item:hover {
         gap: 10px;
     }
 }
+
+/* Model Selector and Clear Button */
+.model-selector-row {
+    display: flex;
+    gap: 8px;
+    margin-bottom: 10px;
+}
+
+.model-selector {
+    flex: 1;
+    padding: 8px 12px;
+    border: 2px solid var(--border-color);
+    border-radius: 6px;
+    background: var(--surface-color);
+    color: var(--text-primary);
+    font-size: 0.9em;
+    cursor: pointer;
+}
+
+.model-selector:focus {
+    outline: none;
+    border-color: var(--primary-color);
+}
+
+.btn-clear-chat {
+    padding: 8px 16px;
+    background: var(--surface-hover);
+    color: var(--text-secondary);
+    border: 1px solid var(--border-color);
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.85em;
+    transition: all 0.2s;
+}
+
+.btn-clear-chat:hover {
+    background: var(--error-color);
+    color: white;
+    border-color: var(--error-color);
+}
+
+/* Code Block Formatting */
+.message .code-block {
+    background: var(--bg-color);
+    border: 1px solid var(--border-color);
+    border-radius: 6px;
+    padding: 12px;
+    margin: 8px 0;
+    overflow-x: auto;
+    font-family: 'Fira Code', 'Monaco', 'Consolas', monospace;
+    font-size: 0.85em;
+    line-height: 1.5;
+}
+
+.message .code-block code {
+    background: none;
+    padding: 0;
+    color: var(--text-primary);
+}
+
+.message .code-block::before {
+    content: attr(data-language);
+    display: block;
+    font-size: 0.75em;
+    color: var(--text-secondary);
+    margin-bottom: 8px;
+    text-transform: uppercase;
+    font-family: system-ui, sans-serif;
+}
+
+.message .inline-code {
+    background: var(--surface-hover);
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-family: 'Fira Code', 'Monaco', 'Consolas', monospace;
+    font-size: 0.9em;
+}
diff --git a/demos/chatbot-evolution/index.html b/demos/chatbot-evolution/index.html
@@ -41,7 +41,7 @@ <h1 class="hero-title">Chatbot Evolution Timeline</h1>
                     <span class="era-label">2020<br>BlenderBot</span>
                 </div>
                 <div class="era era-2020s" data-era="2020s">
-                    <span class="era-label">2025<br>DeepSeek-R1</span>
+                    <span class="era-label">2024<br>Qwen2.5</span>
                 </div>
             </div>
         </section>
@@ -455,20 +455,20 @@ <h2>2020s: GPT & Transformers</h2>
 
                     <div class="chatbot-info">
                         <div class="info-card">
-                            <h3>About DeepSeek-R1</h3>
-                            <p><strong>Innovation:</strong> Distilled reasoning from DeepSeek-R1</p>
-                            <p><strong>Method:</strong> Decoder-only transformer, chain-of-thought</p>
-                            <p><strong>Model:</strong> DeepSeek-R1-Distill-Qwen-1.5B</p>
-                            <p><strong>Benchmarks:</strong> AIME 28.9%, MATH-500 83.9%</p>
+                            <h3>About Modern LLMs</h3>
+                            <p><strong>Innovation:</strong> Instruction-tuned transformers</p>
+                            <p><strong>Method:</strong> Decoder-only transformer with chat templates</p>
+                            <p><strong>Models:</strong> Qwen2.5 0.5B, SmolLM 360M</p>
+                            <p><strong>Context:</strong> Multi-turn conversation support</p>
                         </div>
 
                         <div class="info-card">
                             <h3>How It Works</h3>
                             <ul>
-                                <li>Distilled from larger reasoning model</li>
-                                <li>Chain-of-thought reasoning capabilities</li>
-                                <li>Strong math and coding performance</li>
-                                <li>Fallback: Gemma 3 1B/270M if needed</li>
+                                <li>Pre-trained on vast text corpora</li>
+                                <li>Fine-tuned for instruction following</li>
+                                <li>System prompts guide behavior</li>
+                                <li>Runs entirely in your browser (WASM/WebGPU)</li>
                             </ul>
                         </div>
                     </div>
@@ -484,23 +484,30 @@ <h3>How It Works</h3>
                         <div class="chat-interface">
                             <div class="model-loading-status hidden" id="gpt-loading-status">
                                 <div class="loading-spinner"></div>
-                                <div class="loading-text">Loading DeepSeek-R1...</div>
+                                <div class="loading-text">Loading model...</div>
                                 <div class="loading-progress" id="gpt-progress">Initializing...</div>
                             </div>
                             <div class="chat-messages" id="gpt-messages"></div>
                             <div class="chat-input-area">
-                                <input type="text" class="chat-input" id="gpt-input" placeholder="Talk to DeepSeek-R1...">
+                                <div class="model-selector-row">
+                                    <select id="gpt-model-selector" class="model-selector">
+                                        <option value="0">Qwen 2.5 0.5B (Alibaba)</option>
+                                        <option value="1">SmolLM 360M (HuggingFace)</option>
+                                    </select>
+                                    <button class="btn-clear-chat" id="gpt-clear-btn" title="Clear chat history">Clear</button>
+                                </div>
+                                <input type="text" class="chat-input" id="gpt-input" placeholder="Talk to the model...">
                                 <button class="chat-send" id="gpt-send-btn" onclick="sendMessage('gpt')">Send</button>
                             </div>
-                            <p class="demo-note">Using DeepSeek-R1-Distill 1.5B. Fallback: Gemma 3 1B/270M. Loads on first message.</p>
+                            <p class="demo-note">Select a model above. Conversation history is preserved. First message loads the model (~30s).</p>
                         </div>
                     </div>
 
                     <!-- Architecture Tab -->
                     <div class="chatbot-tab-content" id="gpt-architecture-tab">
                         <div class="architecture-content">
                             <div class="architecture-diagram">
-                                <h4>Decoder-Only Transformer (DeepSeek-R1)</h4>
+                                <h4>Decoder-Only Transformer (Qwen2.5 / SmolLM)</h4>
                                 <div class="arch-flow">
                                     <div class="arch-block input-block">
                                         <div class="block-label">Input Prompt</div>
@@ -513,12 +520,12 @@ <h4>Decoder-Only Transformer (DeepSeek-R1)</h4>
                                     </div>
                                     <div class="arch-arrow">&#8595;</div>
                                     <div class="arch-block decoder-only-block">
-                                        <div class="block-label">DeepSeek-R1 Decoder Stack</div>
+                                        <div class="block-label">Transformer Decoder Stack</div>
                                         <div class="block-content">
                                             <div class="sub-block">Grouped-Query Attention</div>
                                             <div class="sub-block">SiLU FFN</div>
                                             <div class="sub-block">RMSNorm</div>
-                                            <div class="block-note">x28 layers</div>
+                                            <div class="block-note">x24 layers (Qwen) / x32 layers (SmolLM)</div>
                                         </div>
                                     </div>
                                     <div class="arch-arrow">&#8595;</div>
@@ -533,44 +540,40 @@ <h4>Decoder-Only Transformer (DeepSeek-R1)</h4>
                                 <h4>Key Concepts</h4>
                                 <div class="concept-grid">
                                     <div class="concept-card">
-                                        <h5>Knowledge Distillation</h5>
-                                        <p>DeepSeek-R1-Distill captures reasoning abilities from a much larger model, enabling strong performance at small size.</p>
+                                        <h5>Instruction Tuning</h5>
+                                        <p>Models are fine-tuned on instruction-response pairs, learning to follow user requests and generate helpful outputs.</p>
                                     </div>
                                     <div class="concept-card">
-                                        <h5>Chain-of-Thought</h5>
-                                        <p>The model learned to break complex problems into steps, improving accuracy on math and coding tasks.</p>
+                                        <h5>Chat Templates</h5>
+                                        <p>System prompts and message formatting guide model behavior, enabling multi-turn conversations with context.</p>
                                     </div>
                                     <div class="concept-card">
                                         <h5>Grouped-Query Attention</h5>
-                                        <p>GQA reduces memory usage by sharing key-value heads across query heads, enabling efficient inference.</p>
+                                        <p>GQA reduces memory usage by sharing key-value heads across query heads, enabling efficient browser inference.</p>
                                     </div>
                                     <div class="concept-card">
-                                        <h5>RoPE Positions</h5>
-                                        <p>Rotary Position Embeddings encode position through rotation, enabling long context handling.</p>
+                                        <h5>Quantization (q4)</h5>
+                                        <p>4-bit quantization shrinks model size by ~4x while preserving quality, essential for browser deployment.</p>
                                     </div>
                                 </div>
                             </div>
 
                             <div class="model-specs">
-                                <h4>DeepSeek-R1-Distill-Qwen-1.5B Specifications</h4>
+                                <h4>Available Models</h4>
                                 <table class="specs-table">
-                                    <tr><td>Parameters</td><td>1.5 Billion</td></tr>
-                                    <tr><td>Architecture</td><td>Decoder-Only Transformer</td></tr>
-                                    <tr><td>Layers</td><td>28</td></tr>
-                                    <tr><td>Hidden Size</td><td>1536</td></tr>
-                                    <tr><td>Attention Heads</td><td>12</td></tr>
-                                    <tr><td>Context Length</td><td>131,072 tokens</td></tr>
-                                    <tr><td>AIME 2024</td><td>28.9%</td></tr>
-                                    <tr><td>MATH-500</td><td>83.9%</td></tr>
-                                    <tr><td>LiveCodeBench</td><td>16.9%</td></tr>
-                                    <tr><td>Year</td><td>2025 (DeepSeek)</td></tr>
+                                    <tr><th>Spec</th><th>Qwen 2.5 0.5B</th><th>SmolLM 360M</th></tr>
+                                    <tr><td>Parameters</td><td>500 Million</td><td>360 Million</td></tr>
+                                    <tr><td>Architecture</td><td>Decoder-Only</td><td>Decoder-Only</td></tr>
+                                    <tr><td>Layers</td><td>24</td><td>32</td></tr>
+                                    <tr><td>Hidden Size</td><td>896</td><td>960</td></tr>
+                                    <tr><td>Context Length</td><td>32,768 tokens</td><td>2,048 tokens</td></tr>
+                                    <tr><td>Organization</td><td>Alibaba</td><td>HuggingFace</td></tr>
+                                    <tr><td>Year</td><td>2024</td><td>2024</td></tr>
                                 </table>
                                 <div class="model-note">
-                                    <strong>Fallback Chain:</strong> 
-                                    <a href="https://huggingface.co/onnx-community/DeepSeek-R1-Distill-Qwen-1.5B-ONNX" target="_blank">DeepSeek-R1 1.5B</a> → 
-                                    <a href="https://huggingface.co/onnx-community/gemma-3-1b-it-ONNX" target="_blank">Gemma 3 1B</a> → 
-                                    <a href="https://huggingface.co/onnx-community/gemma-3-270m-it-ONNX" target="_blank">Gemma 3 270M</a>. 
-                                    Uses largest successful model.
+                                    <strong>Model Links:</strong> 
+                                    <a href="https://huggingface.co/onnx-community/Qwen2.5-0.5B-Instruct" target="_blank">Qwen2.5-0.5B-Instruct</a> | 
+                                    <a href="https://huggingface.co/onnx-community/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a>
                                 </div>
                             </div>
                         </div>
@@ -639,8 +642,8 @@ <h3>Evolution Stats</h3>
                             <span class="stat-value">90M params</span>
                         </div>
                         <div class="stat-item">
-                            <span class="stat-label">DeepSeek-R1 (2025)</span>
-                            <span class="stat-value">1.5B params</span>
+                            <span class="stat-label">Qwen 2.5 (2024)</span>
+                            <span class="stat-value">500M params</span>
                         </div>
                         <div class="stat-item">
                             <span class="stat-label">Claude 4 Opus (2025)</span>
diff --git a/demos/chatbot-evolution/js/gpt-bot.js b/demos/chatbot-evolution/js/gpt-bot.js
@@ -34,9 +34,36 @@ export class GPTBot {
             }
         ];
         
-        this.systemPrompt = 'You are a friendly AI chatbot in an educational demo about the evolution of conversational AI. Have natural, engaging conversations. Be concise and helpful.';
-
+        this.systemPrompt = 'You are a helpful AI assistant. Answer questions directly and concisely. When writing code, use proper formatting with language tags.';
+        
+        this.conversationHistory = [];
         this.currentModel = null;
+        this.selectedModelIndex = 0;
+    }
+    
+    getAvailableModels() {
+        return this.models.map((m, i) => ({
+            index: i,
+            name: m.displayName,
+            params: m.params,
+            org: m.org,
+            selected: i === this.selectedModelIndex
+        }));
+    }
+    
+    selectModel(index) {
+        if (index >= 0 && index < this.models.length) {
+            this.selectedModelIndex = index;
+            if (this.isReady) {
+                this.isReady = false;
+                this.generator = null;
+                this.conversationHistory = [];
+            }
+        }
+    }
+    
+    clearHistory() {
+        this.conversationHistory = [];
     }
 
     setProgressCallback(callback) {
@@ -96,8 +123,9 @@ export class GPTBot {
             
             console.log(`[GPT] Using device: ${device}`);
 
-            for (let i = 0; i < this.models.length; i++) {
-                this.loadAttempt = i + 1;
+            const startIndex = this.selectedModelIndex;
+            for (let i = startIndex; i < this.models.length; i++) {
+                this.loadAttempt = i - startIndex + 1;
                 const model = this.models[i];
                 this.currentModel = model;
 
@@ -204,13 +232,18 @@ export class GPTBot {
         }
 
         try {
+            this.conversationHistory.push({ role: 'user', content: input.trim() });
+            
+            const maxHistoryLength = 6;
+            const recentHistory = this.conversationHistory.slice(-maxHistoryLength);
+            
             const messages = [
                 { role: 'system', content: this.systemPrompt },
-                { role: 'user', content: input.trim() }
+                ...recentHistory
             ];
 
             const result = await this.generator(messages, {
-                max_new_tokens: 150,
+                max_new_tokens: 256,
                 temperature: 0.7,
                 do_sample: true,
                 top_k: 40,
@@ -229,7 +262,10 @@ export class GPTBot {
                 }
             }
 
-            return this.cleanResponse(response);
+            const cleanedResponse = this.cleanResponse(response);
+            this.conversationHistory.push({ role: 'assistant', content: cleanedResponse });
+            
+            return cleanedResponse;
         } catch (error) {
             console.error('Error generating response:', error);
             return "I'm having trouble generating a response.";
@@ -252,7 +288,7 @@ export class GPTBot {
             .replace(/<think>[\s\S]*?<\/think>/g, '')
             .trim();
 
-        if (cleaned.length > 300) {
+        if (cleaned.length > 500 && !cleaned.includes('```')) {
             const lastPunct = Math.max(
                 cleaned.lastIndexOf('.'),
                 cleaned.lastIndexOf('!'),
@@ -261,12 +297,32 @@ export class GPTBot {
             if (lastPunct > 50) {
                 cleaned = cleaned.substring(0, lastPunct + 1);
             } else {
-                cleaned = cleaned.substring(0, 300) + '...';
+                cleaned = cleaned.substring(0, 500) + '...';
             }
         }
 
         return cleaned || response;
     }
+    
+    formatResponseAsHTML(response) {
+        let html = response;
+        
+        html = html.replace(/```(\w+)?\n([\s\S]*?)```/g, (match, lang, code) => {
+            const language = lang || 'plaintext';
+            const escapedCode = code
+                .replace(/&/g, '&amp;')
+                .replace(/</g, '&lt;')
+                .replace(/>/g, '&gt;')
+                .trim();
+            return `<pre class="code-block" data-language="${language}"><code>${escapedCode}</code></pre>`;
+        });
+        
+        html = html.replace(/`([^`]+)`/g, '<code class="inline-code">$1</code>');
+        
+        html = html.replace(/\n/g, '<br>');
+        
+        return html;
+    }
 
     getModelInfo() {
         const model = this.currentModel || this.models[0];
diff --git a/demos/chatbot-evolution/js/timeline-app.js b/demos/chatbot-evolution/js/timeline-app.js