Merge branch 'gh-pages' of https://github.com/CHATS-lab/verbalized-sampling into gh-pages

simonucl · simonucl · commit a4dd1c026190 · 2025-10-12T21:35:01.000-04:00
diff --git a/app/(dashboard)/page.tsx b/app/(dashboard)/page.tsx
@@ -611,30 +611,28 @@ export default function HomePage() {
           </div>
         </div>
       </section>
+
       
       {/* Introduction */}
-      <section className="py-8 bg-white">
-        <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
+      <section className="py-12 bg-white">
+        <div className="max-w-4xl mx-auto px-8 sm:px-12 lg:px-16">
           <div className="text-center">
-            <img 
-              src="/images/intro_teaser.jpg" 
-              alt="Verbalized Sampling Introduction"
-              className="w-full max-w-4xl mx-auto rounded-lg shadow-lg"
-            />
-            <div className="text-sm text-gray-500 mt-2">
-              <strong>Figure 1:</strong> Overview of Verbalized Sampling (VS) for unlocking LLM diversity.
+            <div className="text-center mb-8">
+              <img 
+                src="/images/intro_teaser.jpg" 
+                alt="Verbalized Sampling Introduction"
+                className="w-full max-w-4xl mx-auto rounded-lg shadow-lg"
+              />
+              <div className="text-sm text-gray-500 mt-2">
+                <strong>Figure 1:</strong> Overview of Verbalized Sampling (VS) for unlocking LLM diversity.
+              </div>
             </div>
-          </div>
-        </div>
-      </section>
 
-      <section className="py-12 bg-white">
-        <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
-          <div className="text-center">
             <h2 className="text-xl font-bold text-gray-700 tracking-tight sm:text-3xl mb-8">
               Abstract
             </h2>
-            <div className="max-w-6xl mx-auto">
+
+            <div className="max-w-4xl mx-auto">
               <p className="text-base text-gray-600 leading-relaxed text-left">
                 Post-training alignment often reduces LLM diversity, leading to a phenomenon known as <em>mode collapse</em>. 
                 Unlike prior work that attributes this effect to algorithmic limitations, we identify a fundamental, pervasive data-level driver: <em>typicality bias</em> in preference data, 
@@ -649,9 +647,49 @@ export default function HomePage() {
             </div>
           </div>
         </div>
+      </section>
+
+      {/* Verbalized Sampling: Title & Description left, install/code right */}
+      <section className="py-12 bg-gray-50">
+          <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
+            <div className="lg:grid lg:grid-cols-2 lg:gap-12 lg:items-center">
+              <div>
+                  <h2 className="text-xl font-bold text-gray-700 tracking-tight sm:text-3xl mb-6">
+                    Make Your LLMs More Creative!<br />
+                    <span className="text-orange-500"> With Verbalized Sampling</span>
+                  </h2>
+                 <div className="space-y-4 text-base text-gray-600">
+                    <p>
+                      Run Verbalized Sampling and unlock diverse LLM generations in seconds. 
+                      Just install and use our open-source package!
+                    </p>
+                    <p>
+                      Check our{' '}
+                      <a
+                        href="https://github.com/CHATS-lab/verbalize-sampling"
+                        className="underline hover:text-orange-500 font-semibold"
+                        target="_blank"
+                        rel="noopener noreferrer"
+                      >
+                        GitHub
+                      </a>{' '}
+                      for more details.
+                    </p>
+                 </div>
+              </div>
+              <div className="mt-8 lg:mt-0">
+                <div className="w-full">
+                  {(() => {
+                    const { Terminal } = require("./terminal_package");
+                    return <Terminal />;
+                  })()}
+                </div>
+              </div>
+            </div>
+          </div>
         </section>
 
-        <section className="py-12 bg-gray-50">
+        <section className="py-12 bg-white">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="lg:grid lg:grid-cols-2 lg:gap-12 lg:items-center">
               <div>
@@ -686,7 +724,7 @@ export default function HomePage() {
           </div>
         </section>
 
-        <section className="py-12 bg-white">
+        <section className="py-12 bg-gray-50">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="lg:grid lg:grid-cols-2 lg:gap-12 lg:items-center">
               <div className="mt-8 lg:mt-0">
@@ -727,7 +765,7 @@ export default function HomePage() {
           </div>
         </section>
 
-        <section className="py-12 bg-gray-50">
+        <section className="py-12 bg-white">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="max-w-6xl mx-auto text-center">
               <h2 className="text-xl font-bold text-gray-700 tracking-tight sm:text-3xl mb-6">
@@ -745,24 +783,24 @@ export default function HomePage() {
                   <strong>Figure 4:</strong> Qualitative and quantitative examples of Verbalized Sampling on creative writing, dialogue simulation, and enumerative open-ended QA.
                 </p>
               </div>
-               <div className="space-y-4 text-base text-gray-600 text-left">
-                 <p>
-                   Our comprehensive experiments on multiple tasks demonstrate that Verbalized Sampling significantly improves the diversity-quality trade-off across tasks and model families, 
-                   without compromising factual accuracy and safety.
-                 </p>
-                 <p>
-                   As shown in Figure 4, for <strong>story writing</strong>, VS improves the output diversity. 
-                   For <strong>dialogue simulation</strong>, VS simulates the donation amount distribution much closer to the human distribution, and generates more realistic persuasion behaviors.
-                   On the task of <strong>enumerative open-ended QA</strong>, we ask the model to "generate US states". We first query a pretraining corpus (RedPajama) to establish a "reference" distribution of US 
-                   state names in the pretraining data. The verbalized probability distribution generated by VS, when averaged over 10 trials, closely aligns with this reference pretraining distribution (KL=0.12). 
-                   In contrast, direct prompting collapses into a few modes, repeatedly outputting states like California and Texas. 
-                 </p>
-               </div>
+              <div className="space-y-4 text-base text-gray-600 text-left max-w-5xl mx-auto xl:max-w-3xl">
+                <p>
+                  Our comprehensive experiments on multiple tasks demonstrate that Verbalized Sampling significantly improves the diversity-quality trade-off across tasks and model families, 
+                  without compromising factual accuracy and safety.
+                </p>
+                <p>
+                  As shown in Figure 4, for <strong>story writing</strong>, VS improves the output diversity. 
+                  For <strong>dialogue simulation</strong>, VS simulates the donation amount distribution much closer to the human distribution, and generates more realistic persuasion behaviors.
+                  On the task of <strong>enumerative open-ended QA</strong>, we ask the model to "generate US states". We first query a pretraining corpus (RedPajama) to establish a "reference" distribution of US 
+                  state names in the pretraining data. The verbalized probability distribution generated by VS, when averaged over 10 trials, closely aligns with this reference pretraining distribution (KL=0.12). 
+                  In contrast, direct prompting collapses into a few modes, repeatedly outputting states like California and Texas. 
+                </p>
+              </div>
             </div>
           </div>
         </section>
 
-        <section className="py-12 bg-white">
+        <section className="py-12 bg-gray-50">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="lg:grid lg:grid-cols-2 lg:gap-12 lg:items-center">
               <div>
@@ -792,7 +830,7 @@ export default function HomePage() {
           </div>
         </section>
 
-        <section className="py-12 bg-gray-50">
+        <section className="py-12 bg-white">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="lg:grid lg:grid-cols-2 lg:gap-12 lg:items-center">
               <div>
@@ -823,7 +861,7 @@ export default function HomePage() {
           </div>
         </section>
 
-        <section className="py-12 bg-white w-full">
+        <section className="py-12 bg-gray-50 w-full">
           <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
             <div className="lg:grid lg:grid-cols-12 lg:gap-4">
               <div className="sm:text-center md:max-w-2xl md:mx-auto lg:col-span-7 lg:text-left">
@@ -845,7 +883,7 @@ export default function HomePage() {
         </section>
 
       {/* BibTeX Citation */}
-      <section className="py-12 bg-gray-50">
+      <section className="py-12 bg-white">
         <div className="max-w-6xl mx-auto px-8 sm:px-12 lg:px-16">
           <div className="text-center">
             <h2 className="text-xl font-bold text-gray-700 sm:text-3xl mb-8">
diff --git a/app/(dashboard)/terminal_package.tsx b/app/(dashboard)/terminal_package.tsx
@@ -4,38 +4,129 @@ import { useState, useEffect } from 'react';
 import { Copy, Check } from 'lucide-react';
 
 export function Terminal() {
-  const [terminalStep, setTerminalStep] = useState(0);
-  const [copied, setCopied] = useState(false);
-  const terminalSteps = [
-    'conda create -n collabllm python=3.10',
-    'conda activate collabllm',
-    'pip install collabllm',
-    '',
-    'git clone https://github.com/Wuyxin/collabllm.git',
-    'python -m scripts.engine.build_dataset <...>',
-    'python -m scripts.train.{sft/*_dpo/ppo} <...>',
+  const pipCommand = [
+    'pip install verbalized-sampling'
   ];
 
-  useEffect(() => {
-    const timer = setTimeout(() => {
-      setTerminalStep((prev) =>
-        prev < terminalSteps.length - 1 ? prev + 1 : prev
-      );
-    }, 500);
+  const pythonCode = [
+    'verbalize run \\',
+    '    --task joke \\',
+    '    --prompt "Tell me a joke about coffee." \\',
+    '    --model "gpt-4.1" \\',
+    '    --methods "direct vs_standard" \\',
+    '    --num-responses 10 \\',
+    '    --metrics "diversity joke_quality"'
+  ];
+
+  const allSteps = [...pipCommand, '', ...pythonCode];
+  const [terminalStep, setTerminalStep] = useState(allSteps.length - 1);
+  const [copied, setCopied] = useState(false);
 
-    return () => clearTimeout(timer);
-  }, [terminalStep]);
 
   const copyToClipboard = () => {
-    navigator.clipboard.writeText(terminalSteps.join('\n'));
+    navigator.clipboard.writeText(allSteps.join('\n'));
     setCopied(true);
     setTimeout(() => setCopied(false), 2000);
   };
 
+  const colorizeCode = (line: string) => {
+    // Handle empty lines
+    if (line.trim() === '') {
+      return <span className="text-gray-500">&nbsp;</span>;
+    }
+
+    // Handle comments
+    if (line.trim().startsWith('#')) {
+      return (
+        <span className="text-gray-400">
+          {line.replace('#', '')}
+        </span>
+      );
+    }
+
+    // Handle pip install command
+    if (line.includes('pip install')) {
+      return (
+        <>
+          <span className="text-blue-400">pip</span>
+          <span className="text-white"> install </span>
+          <span className="text-yellow-300">verbalized-sampling</span>
+        </>
+      );
+    }
+
+    // Handle verbalize command
+    if (line.includes('verbalize run')) {
+      return (
+        <>
+          <span className="text-blue-400">verbalize</span>
+          <span className="text-white"> run </span>
+          <span className="text-white">\\</span>
+        </>
+      );
+    }
+
+    // Handle command line arguments (lines starting with spaces and --)
+    if (line.trim().startsWith('--')) {
+      const leadingSpaces = line.match(/^(\s*)/)?.[1] || '';
+      const restOfLine = line.trim();
+      // Convert spaces to non-breaking spaces to preserve them
+      const preservedSpaces = leadingSpaces.replace(/ /g, '\u00A0');
+      return (
+        <>
+          <span className="text-white">{preservedSpaces}</span>
+          <span className="text-white">{restOfLine}</span>
+        </>
+      );
+    }
+
+    // Handle import statements
+    if (line.startsWith('from ') || line.startsWith('import ')) {
+      const parts = line.split(' ');
+      return (
+        <>
+          <span className="text-blue-400">{parts[0]}</span>
+          <span className="text-white"> </span>
+          <span className="text-green-300">{parts.slice(1).join(' ')}</span>
+        </>
+      );
+    }
+
+    // Handle variable assignments
+    if (line.includes(' = ')) {
+      const [variable, ...rest] = line.split(' = ');
+      return (
+        <>
+          <span className="text-yellow-300">{variable}</span>
+          <span className="text-white"> = </span>
+          <span className="text-blue-300">{rest.join(' = ')}</span>
+        </>
+      );
+    }
+
+    // Handle function calls
+    if (line.includes('(') && line.includes(')')) {
+      const beforeParen = line.substring(0, line.indexOf('('));
+      const parenContent = line.substring(line.indexOf('('), line.lastIndexOf(')') + 1);
+      const afterParen = line.substring(line.lastIndexOf(')') + 1);
+
+      return (
+        <>
+          <span className="text-cyan-300">{beforeParen}</span>
+          <span className="text-white">{parenContent}</span>
+          <span className="text-white">{afterParen}</span>
+        </>
+      );
+    }
+
+    // Default case
+    return <span className="text-white">{line}</span>;
+  };
+
   return (
-    <div className="w-full rounded-lg shadow-lg overflow-hidden bg-gray-900 text-white font-mono text-sm relative">
-      <div className="p-4">
-        <div className="flex justify-between items-center mb-4">
+    <div className="w-full min-w-full rounded-lg shadow-lg overflow-hidden bg-gray-900 text-white font-mono text-sm relative">
+      <div className="p-1">
+        <div className="flex justify-between items-center mb-2">
           <div className="flex space-x-2">
             <div className="w-3 h-3 rounded-full bg-red-500"></div>
             <div className="w-3 h-3 rounded-full bg-yellow-500"></div>
@@ -54,14 +145,43 @@ export function Terminal() {
           </button>
         </div>
         <div className="space-y-2">
-          {terminalSteps.map((step, index) => (
-            <div
-              key={index}
-              className={`${index > terminalStep ? 'opacity-0' : 'opacity-100'} transition-opacity duration-300`}
-            >
-              <span className="text-green-400">$</span> {step}
-            </div>
-          ))}
+          {/* Pip Install Block */}
+          <div className="space-y-1">
+            {pipCommand.map((step, index) => (
+              <div
+                key={`pip-${index}`}
+                className={`${index > terminalStep ? 'opacity-0' : 'opacity-100'} transition-opacity duration-300 whitespace-nowrap`}
+              >
+                <span className="text-green-400">$</span> {colorizeCode(step)}
+              </div>
+            ))}
+          </div>
+
+          {/* Separator */}
+          <div className={`${pipCommand.length > terminalStep ? 'opacity-0' : 'opacity-100'} transition-opacity duration-300`}>
+            <div className="text-gray-500">&nbsp;</div>
+          </div>
+
+          {/* Python Code Block */}
+          <div className="space-y-1">
+            {pythonCode.map((step, index) => {
+              const globalIndex = pipCommand.length + 1 + index;
+              return (
+                <div
+                  key={`python-${index}`}
+                  className={`${globalIndex > terminalStep ? 'opacity-0' : 'opacity-100'} transition-opacity duration-300 whitespace-pre`}
+                >
+                  {step.trim() === '' ? (
+                    colorizeCode(step)
+                  ) : (
+                    <>
+                      <span className="text-green-400">$</span> {colorizeCode(step)}
+                    </>
+                  )}
+                </div>
+              );
+            })}
+          </div>
         </div>
       </div>
     </div>
diff --git a/app/(dashboard)/terminal_prompt.tsx b/app/(dashboard)/terminal_prompt.tsx
@@ -9,8 +9,8 @@ export function Terminal_Prompt() {
   const terminalSteps = [
     { line: 'You are a helpful assistant. For each user query, generate a set of five responses. Each response should be approximately 200 words.', showPrompt: true },
     { line: 'Return the responses each within a separate <response> tag.', showPrompt: false },
-    { line: 'Each <response> tag include a <text> and a numeric <probability>.', showPrompt: false },
-    { line: 'Please sample at random from the full distribution.', showPrompt: false },
+    { line: 'Each <response> tag must include a <text> and a numeric <probability>.', showPrompt: false },
+    { line: 'Randomly sample the responses from the full distribution.', showPrompt: false },
     { line: '<user_query>Write a short story about a bear.</user_query>', showPrompt: true },
   ];