onlook-dev · rohan-chidurala · Sep 10, 2025 · ellipsis-dev · Sep 10, 2025 · coderabbitai
diff --git a/apps/web/client/public/onlook-preload-script.js b/apps/web/client/public/onlook-preload-script.js
diff --git a/.../[id]/_components/right-panel/chat-tab/chat-messages/message-content/tool-call-simple.tsx b/.../[id]/_components/right-panel/chat-tab/chat-messages/message-content/tool-call-simple.tsx
@@ -31,7 +31,9 @@ import {
     WEB_SEARCH_TOOL_NAME,
     type WEB_SEARCH_TOOL_PARAMETERS,
     WRITE_FILE_TOOL_NAME,
-    type WRITE_FILE_TOOL_PARAMETERS
+    type WRITE_FILE_TOOL_PARAMETERS,
+    CLONE_WEBSITE_TOOL_NAME,
+    type CLONE_WEBSITE_TOOL_PARAMETERS,
 } from '@onlook/ai';
 import { Icons } from '@onlook/ui/icons';
 import { cn } from '@onlook/ui/utils';
@@ -60,6 +62,7 @@ const TOOL_ICONS: Record<string, any> = {
     [TYPECHECK_TOOL_NAME]: Icons.MagnifyingGlass,
     [LIST_BRANCHES_TOOL_NAME]: Icons.Commit,
     [GLOB_TOOL_NAME]: Icons.MagnifyingGlass,
+    [CLONE_WEBSITE_TOOL_NAME]: Icons.Globe,
 } as const;
 
 function truncateString(str: string, maxLength: number = 30) {
@@ -192,6 +195,18 @@ export function ToolCallSimple({
                     return 'Reading Onlook instructions';
                 case TYPECHECK_TOOL_NAME:
                     return 'Checking types';
+                case CLONE_WEBSITE_TOOL_NAME:
+                    const params13 = toolInvocation.input as z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>;
+                    if (params13?.url) {
+                        try {
+                            const url = new URL(params13.url);
+                            return 'Cloning ' + (url.hostname || 'website');
+                        } catch (error) {
+                            return 'Cloning website';
+                        }
+                    } else {
+                        return 'Cloning website';
+                    }
-                case CLONE_WEBSITE_TOOL_NAME:
-                    const params13 = toolInvocation.input as z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>;
-                    if (params13?.url) {
-                        try {
-                            const url = new URL(params13.url);
-                            return 'Cloning ' + (url.hostname || 'website');
-                        } catch (error) {
-                            return 'Cloning website';
-                        }
-                    } else {
-                        return 'Cloning website';
-                    }
+                case CLONE_WEBSITE_TOOL_NAME: {
+                    const params13 = toolInvocation.input as z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>;
+                    if (params13?.url) {
+                        try {
+                            const url = new URL(params13.url);
+                            return 'Cloning ' + (url.hostname || 'website');
+                        } catch (error) {
+                            return 'Cloning website';
+                        }
+                    } else {
+                        return 'Cloning website';
+                    }
+                }
-                case CLONE_WEBSITE_TOOL_NAME:
-                    const params13 = toolInvocation.input as z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>;
-                    if (params13?.url) {
-                        try {
-                            const url = new URL(params13.url);
-                            return 'Cloning ' + (url.hostname || 'website');
-                        } catch (error) {
-                            return 'Cloning website';
-                        }
-                    } else {
-                        return 'Cloning website';
-                    }
+                case CLONE_WEBSITE_TOOL_NAME: {
+                    const params13 = toolInvocation.input as z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>;
+                    if (params13?.url) {
+                        try {
+                            const url = new URL(params13.url);
+                            return 'Cloning ' + (url.hostname || 'website');
+                        } catch (error) {
+                            return 'Cloning website';
+                        }
+                    } else {
+                        return 'Cloning website';
+                    }
+                }
                 default:
                     return toolName?.replace(/[-_]/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
             }

diff --git a/apps/web/client/src/components/tools/handlers/web.ts b/apps/web/client/src/components/tools/handlers/web.ts
@@ -1,9 +1,10 @@
 import { api } from '@/trpc/client';
 import {
     type SCRAPE_URL_TOOL_PARAMETERS,
-    type WEB_SEARCH_TOOL_PARAMETERS
+    type WEB_SEARCH_TOOL_PARAMETERS,
+    type CLONE_WEBSITE_TOOL_PARAMETERS
 } from '@onlook/ai';
-import type { WebSearchResult } from '@onlook/models';
+import type { CloneWebsiteResult, WebSearchResult } from '@onlook/models';
 import { type z } from 'zod';
 
 export async function handleScrapeUrlTool(
@@ -48,3 +49,77 @@ export async function handleWebSearchTool(
         };
     }
 }
+
+export async function handleCloneWebsiteTool(
+    args: z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>,
+    editorEngine: any,
+): Promise<CloneWebsiteResult> {
+    // Store args in function scope for error handling
+    const requestUrl = args.url;
+    const branchId = args.branchId;
+
+
+    try {
+        const result = await api.code.cloneWebsite.mutate({
+            url: requestUrl,
+        });
+
+        if (!result.result) {
+            throw new Error(result.error || 'Failed to clone website');
+        }
+
+        const { markdown, html, designScreenshot, designDocument, assets } = result.result;
+
+        // Download assets into public/cloned-assets/
+        const baseDir = `public/cloned-assets/`;
+        const sandbox = editorEngine.branches.getSandboxById(branchId);
+        if (!sandbox) {
+            console.warn('Sandbox not found for branch ID:', branchId);
+            return {
+                result: {
+                    markdown: markdown,
+                    html: html,
+                    designScreenshot: designScreenshot,
+                    designDocument: designDocument,
+                    assets: assets,
+                },
+                error: null,
+            };
+        }
+        await sandbox.session.runCommand(`mkdir -p ${baseDir}`);
+
+        for (const asset of assets) {
+
+            const rawBase = asset.title;
+            const safeBase = rawBase.replace(/\s+/g, '-').replace(/[^a-zA-Z0-9._-]/g, '-');
+            const filename = safeBase + '.png';
+            const dest = `${baseDir}${filename}`;
+            try {
+                const download = await sandbox.session.runCommand(`curl -L --silent --fail --show-error "${asset.url}" -o "${dest}"`);
+                if (download.success) {
+                    // Attach saved location (relative public path) for UI usage
+                    (asset as any).fileLocation = dest;
+                } else {
+                    console.log('download failed', download.error);
+                    console.warn(`Failed to download asset ${asset.url}:`, download.error);
+                }
+            } catch (error) {
+                console.warn(`Failed to download asset ${asset.url}:`, error);
+            }
+        }
+
+        return {
+            result: {
+                markdown: markdown,
+                html: html,
+                designScreenshot: designScreenshot,
+                designDocument: designDocument,
+                assets: assets,
+            },
+            error: null,
+        };
+    } catch (error) {
+        console.error('Error cloning website:', error);
+        throw new Error(`Failed to clone website ${requestUrl}: ${error instanceof Error ? error.message : 'Unknown error'}`);
+    }
+}
diff --git a/apps/web/client/src/components/tools/tools.ts b/apps/web/client/src/components/tools/tools.ts
@@ -35,7 +35,9 @@ import {
     WEB_SEARCH_TOOL_NAME,
     WEB_SEARCH_TOOL_PARAMETERS,
     WRITE_FILE_TOOL_NAME,
-    WRITE_FILE_TOOL_PARAMETERS
+    WRITE_FILE_TOOL_PARAMETERS,
+    CLONE_WEBSITE_TOOL_NAME,
+    CLONE_WEBSITE_TOOL_PARAMETERS,
 } from '@onlook/ai';
 import { toast } from '@onlook/ui/sonner';
 import { type z } from 'zod';
@@ -56,7 +58,8 @@ import {
     handleTerminalCommandTool,
     handleTypecheckTool,
     handleWebSearchTool,
-    handleWriteFileTool
+    handleWriteFileTool,
+    handleCloneWebsiteTool,
 } from './handlers';
 import { EMPTY_TOOL_PARAMETERS } from './helpers';
 
@@ -174,6 +177,12 @@ const TOOL_HANDLERS: ClientToolMap = {
         handler: async (args: z.infer<typeof CHECK_ERRORS_TOOL_PARAMETERS>, editorEngine: EditorEngine) =>
             handleCheckErrors(args, editorEngine),
     },
+    [CLONE_WEBSITE_TOOL_NAME]: {
+        name: CLONE_WEBSITE_TOOL_NAME,
+        inputSchema: CLONE_WEBSITE_TOOL_PARAMETERS,
+        handler: async (args: z.infer<typeof CLONE_WEBSITE_TOOL_PARAMETERS>, editorEngine: EditorEngine) =>
+            handleCloneWebsiteTool(args, editorEngine),
+    },
 };
 
 export async function handleToolCall(toolCall: ToolCall<string, unknown>, editorEngine: EditorEngine) {

diff --git a/apps/web/client/src/server/api/routers/code.ts b/apps/web/client/src/server/api/routers/code.ts
@@ -1,10 +1,16 @@
 import { env } from '@/env';
 import FirecrawlApp from '@mendable/firecrawl-js';
 import { applyCodeChange } from '@onlook/ai';
-import type { WebSearchResult } from '@onlook/models';
+import type { WebSearchResult, CloneWebsiteResult } from '@onlook/models';
 import Exa from 'exa-js';
 import { z } from 'zod';
 import { createTRPCRouter, protectedProcedure } from '../trpc';
+import { initModel } from '@onlook/ai';
+import { LLMProvider, ANTHROPIC_MODELS } from '@onlook/models';
+import { generateObject } from 'ai';
+import { CLONE_WEBSITE_DESIGN_PROMPT } from '@onlook/ai/src/prompt/clone';
+import { DesignSchema } from '@onlook/models/tools';
-import { CLONE_WEBSITE_DESIGN_PROMPT } from '@onlook/ai/src/prompt/clone';
-import { DesignSchema } from '@onlook/models/tools';
+import { CLONE_WEBSITE_DESIGN_PROMPT } from '@onlook/ai';
+import { DesignSchema } from '@onlook/models';
-import { CLONE_WEBSITE_DESIGN_PROMPT } from '@onlook/ai/src/prompt/clone';
-import { DesignSchema } from '@onlook/models/tools';
+import { CLONE_WEBSITE_DESIGN_PROMPT } from '@onlook/ai';
+import { DesignSchema } from '@onlook/models';
+
 
 export const codeRouter = createTRPCRouter({
     applyDiff: protectedProcedure
@@ -148,4 +154,133 @@ export const codeRouter = createTRPCRouter({
                 };
             }
         }),
+
+    cloneWebsite: protectedProcedure
+        .input(z.object({
+            url: z.string().url(),
+        }))
+        .mutation(async ({ input }): Promise<CloneWebsiteResult> => {
+            try {
+                if (!env.FIRECRAWL_API_KEY) {
+                    throw new Error('FIRECRAWL_API_KEY is not configured');
+                }
+
+                const app = new FirecrawlApp({ apiKey: env.FIRECRAWL_API_KEY });
+
+                // Scrape the website with screenshot to get visual content
+                const result = await app.scrapeUrl(input.url, {
+                    formats: ['html', 'screenshot@fullPage', 'markdown'],
+                    onlyMainContent: false,
+                    waitFor: 2000,
+                });
+                const imageAssetsResult = await app.scrapeUrl(input.url, {
+                    formats: ['markdown'],
+                    onlyMainContent: false,
+                    includeTags: ['img'],
+                    waitFor: 2000,
+                });
-                // Scrape the website with screenshot to get visual content
-                const result = await app.scrapeUrl(input.url, {
-                    formats: ['html', 'screenshot@fullPage', 'markdown'],
-                    onlyMainContent: false,
-                    waitFor: 2000,
-                });
-                const imageAssetsResult = await app.scrapeUrl(input.url, {
-                    formats: ['markdown'],
-                    onlyMainContent: false,
-                    includeTags: ['img'],
-                    waitFor: 2000,
-                });
+                // Scrape the website with screenshot to get visual content
+                const waitTime = Math.min(10000, 2000 + (input.url.length * 10)); // Adaptive wait based on URL complexity, max 10s
+                const result = await app.scrapeUrl(input.url, {
+                    formats: ['html', 'screenshot@fullPage', 'markdown'],
+                    onlyMainContent: false,
+                    waitFor: waitTime,
+                    timeout: 30000, // Add overall timeout
+                });
+                const imageAssetsResult = await app.scrapeUrl(input.url, {
+                    formats: ['markdown'],
+                    onlyMainContent: false,
+                    includeTags: ['img'],
+                    waitFor: waitTime,
+                    timeout: 30000,
+                });
-                // Scrape the website with screenshot to get visual content
-                const result = await app.scrapeUrl(input.url, {
-                    formats: ['html', 'screenshot@fullPage', 'markdown'],
-                    onlyMainContent: false,
-                    waitFor: 2000,
-                });
-                const imageAssetsResult = await app.scrapeUrl(input.url, {
-                    formats: ['markdown'],
-                    onlyMainContent: false,
-                    includeTags: ['img'],
-                    waitFor: 2000,
-                });
+                // Scrape the website with screenshot to get visual content
+                const waitTime = Math.min(10000, 2000 + (input.url.length * 10)); // Adaptive wait based on URL complexity, max 10s
+                const result = await app.scrapeUrl(input.url, {
+                    formats: ['html', 'screenshot@fullPage', 'markdown'],
+                    onlyMainContent: false,
+                    waitFor: waitTime,
+                    timeout: 30000, // Add overall timeout
+                });
+                const imageAssetsResult = await app.scrapeUrl(input.url, {
+                    formats: ['markdown'],
+                    onlyMainContent: false,
+                    includeTags: ['img'],
+                    waitFor: waitTime,
+                    timeout: 30000,
+                });
+
+                if (!result.success) {
+                    throw new Error(`Failed to clone website: ${result.error || 'Unknown error'}`);
+                }
+
+                let imageAssets: {
+                    url: string;
+                    title: string;
+                }[] = [];
+                if ('success' in imageAssetsResult && imageAssetsResult.success && imageAssetsResult.markdown) {
+                    const md = imageAssetsResult.markdown;
+                    const mdImgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
+                    let match: RegExpExecArray | null;
+                    while ((match = mdImgRegex.exec(md)) !== null) {
+                        const alt = (match[1] || '').trim();
+                        const urlCandidate = match[2];
+                        const title = alt ? alt.replace(/\s+/g, '-') : '';
+                        if (!urlCandidate) continue;
+                        try {
+                            const absoluteUrl = new URL(urlCandidate, input.url).toString();
+                            imageAssets.push({ url: absoluteUrl, title });
+                        } catch {
+                            imageAssets.push({ url: urlCandidate, title });
+                        }
+                    }
+                } else if (result.html) {
+                    // Fallback: parse from HTML if markdown not available
+                    const imgTagRegex = /<img[^>]*>/gi;
+                    const srcRegex = /src=["']([^"']+)["']/i;
+                    const altRegex = /alt=["']([^"']*)["']/i;
+                    let tagMatch: RegExpExecArray | null;
+                    while ((tagMatch = imgTagRegex.exec(result.html)) !== null) {
+                        const tag = tagMatch[0];
+                        const srcMatch = srcRegex.exec(tag);
+                        if (!srcMatch) continue;
+                        const srcCandidate = srcMatch[1] ?? '';
+                        const altMatch = altRegex.exec(tag);
+                        const alt = (altMatch?.[1] ?? '').trim();
+                        const title = alt ? alt.replace(/\s+/g, '-') : '';
+                        try {
+                            const absoluteUrl = new URL(srcCandidate, input.url).toString();
+                            imageAssets.push({ url: absoluteUrl, title });
+                        } catch {
+                            imageAssets.push({ url: srcCandidate, title });
+                        }
+                    }
+                }
+
+
+                // Dedupe by URL
+
+                const byUrl = new Map<string, { url: string; title: string }>();
+                for (const asset of imageAssets) {
+                    if (!byUrl.has(asset.url)) {
+                        byUrl.set(asset.url, asset);
+                    }
+                }
+                imageAssets = Array.from(byUrl.values());
+
-                let imageAssets: {
-                    url: string;
-                    title: string;
-                }[] = [];
-                if ('success' in imageAssetsResult && imageAssetsResult.success && imageAssetsResult.markdown) {
-                    const md = imageAssetsResult.markdown;
-                    const mdImgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
-                    let match: RegExpExecArray | null;
-                    while ((match = mdImgRegex.exec(md)) !== null) {
-                        const alt = (match[1] || '').trim();
-                        const urlCandidate = match[2];
-                        const title = alt ? alt.replace(/\s+/g, '-') : '';
-                        if (!urlCandidate) continue;
-                        try {
-                            const absoluteUrl = new URL(urlCandidate, input.url).toString();
-                            imageAssets.push({ url: absoluteUrl, title });
-                        } catch {
-                            imageAssets.push({ url: urlCandidate, title });
-                        }
-                    }
-                } else if (result.html) {
-                    // Fallback: parse from HTML if markdown not available
-                    const imgTagRegex = /<img[^>]*>/gi;
-                    const srcRegex = /src=["']([^"']+)["']/i;
-                    const altRegex = /alt=["']([^"']*)["']/i;
-                    let tagMatch: RegExpExecArray | null;
-                    while ((tagMatch = imgTagRegex.exec(result.html)) !== null) {
-                        const tag = tagMatch[0];
-                        const srcMatch = srcRegex.exec(tag);
-                        if (!srcMatch) continue;
-                        const srcCandidate = srcMatch[1] ?? '';
-                        const altMatch = altRegex.exec(tag);
-                        const alt = (altMatch?.[1] ?? '').trim();
-                        const title = alt ? alt.replace(/\s+/g, '-') : '';
-                        try {
-                            const absoluteUrl = new URL(srcCandidate, input.url).toString();
-                            imageAssets.push({ url: absoluteUrl, title });
-                        } catch {
-                            imageAssets.push({ url: srcCandidate, title });
-                        }
-                    }
-                }
-
-
-                // Dedupe by URL
-                
-                const byUrl = new Map<string, { url: string; title: string }>();
-                for (const asset of imageAssets) {
-                    if (!byUrl.has(asset.url)) {
-                        byUrl.set(asset.url, asset);
-                    }
-                }
-                imageAssets = Array.from(byUrl.values());
-                
+let imageAssets: {
+    url: string;
+    title: string;
+}[] = [];
+
+// Helper to validate and normalize image URLs
+const isValidImageUrl = (url: string): boolean => {
+    if (!url || url.startsWith('data:')) return false;
+    try {
+        const parsed = new URL(url);
+        return ['http:', 'https:'].includes(parsed.protocol);
+    } catch {
+        return false;
+    }
+};
+
+if ('success' in imageAssetsResult && imageAssetsResult.success && imageAssetsResult.markdown) {
+    const md = imageAssetsResult.markdown;
+    const mdImgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
+    let match: RegExpExecArray | null;
+    while ((match = mdImgRegex.exec(md)) !== null) {
+        const alt = (match[1] || '').trim();
+        const urlCandidate = match[2];
+        const title = alt ? alt.replace(/\s+/g, '-') : '';
+        if (!urlCandidate) continue;
+        try {
+            const absoluteUrl = new URL(urlCandidate, input.url).toString();
+            if (isValidImageUrl(absoluteUrl)) {
+                imageAssets.push({ url: absoluteUrl, title });
+            }
+        } catch {
+            // Skip invalid URLs
+        }
+    }
+} else if (result.html) {
+    // Fallback: parse from HTML if markdown not available
+    const imgTagRegex = /<img[^>]*>/gi;
+    const srcRegex = /src=["']([^"']+)["']/i;
+    const altRegex = /alt=["']([^"']*)["']/i;
+    let tagMatch: RegExpExecArray | null;
+    while ((tagMatch = imgTagRegex.exec(result.html)) !== null) {
+        const tag = tagMatch[0];
+        const srcMatch = srcRegex.exec(tag);
+        if (!srcMatch) continue;
+        const srcCandidate = srcMatch[1] ?? '';
+        const altMatch = altRegex.exec(tag);
+        const alt = (altMatch?.[1] ?? '').trim();
+        const title = alt ? alt.replace(/\s+/g, '-') : '';
+        try {
+            const absoluteUrl = new URL(srcCandidate, input.url).toString();
+            imageAssets.push({ url: absoluteUrl, title });
+        } catch {
+            imageAssets.push({ url: srcCandidate, title });
+        }
+    }
+}
+
+// Dedupe by URL
+const byUrl = new Map<string, { url: string; title: string }>();
+for (const asset of imageAssets) {
+    if (!byUrl.has(asset.url)) {
+        byUrl.set(asset.url, asset);
+    }
+}
+imageAssets = Array.from(byUrl.values());
-                let imageAssets: {
-                    url: string;
-                    title: string;
-                }[] = [];
-                if ('success' in imageAssetsResult && imageAssetsResult.success && imageAssetsResult.markdown) {
-                    const md = imageAssetsResult.markdown;
-                    const mdImgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
-                    let match: RegExpExecArray | null;
-                    while ((match = mdImgRegex.exec(md)) !== null) {
-                        const alt = (match[1] || '').trim();
-                        const urlCandidate = match[2];
-                        const title = alt ? alt.replace(/\s+/g, '-') : '';
-                        if (!urlCandidate) continue;
-                        try {
-                            const absoluteUrl = new URL(urlCandidate, input.url).toString();
-                            imageAssets.push({ url: absoluteUrl, title });
-                        } catch {
-                            imageAssets.push({ url: urlCandidate, title });
-                        }
-                    }
-                } else if (result.html) {
-                    // Fallback: parse from HTML if markdown not available
-                    const imgTagRegex = /<img[^>]*>/gi;
-                    const srcRegex = /src=["']([^"']+)["']/i;
-                    const altRegex = /alt=["']([^"']*)["']/i;
-                    let tagMatch: RegExpExecArray | null;
-                    while ((tagMatch = imgTagRegex.exec(result.html)) !== null) {
-                        const tag = tagMatch[0];
-                        const srcMatch = srcRegex.exec(tag);
-                        if (!srcMatch) continue;
-                        const srcCandidate = srcMatch[1] ?? '';
-                        const altMatch = altRegex.exec(tag);
-                        const alt = (altMatch?.[1] ?? '').trim();
-                        const title = alt ? alt.replace(/\s+/g, '-') : '';
-                        try {
-                            const absoluteUrl = new URL(srcCandidate, input.url).toString();
-                            imageAssets.push({ url: absoluteUrl, title });
-                        } catch {
-                            imageAssets.push({ url: srcCandidate, title });
-                        }
-                    }
-                }
-
-
-                // Dedupe by URL
-                
-                const byUrl = new Map<string, { url: string; title: string }>();
-                for (const asset of imageAssets) {
-                    if (!byUrl.has(asset.url)) {
-                        byUrl.set(asset.url, asset);
-                    }
-                }
-                imageAssets = Array.from(byUrl.values());
-                
+let imageAssets: {
+    url: string;
+    title: string;
+}[] = [];
+
+// Helper to validate and normalize image URLs
+const isValidImageUrl = (url: string): boolean => {
+    if (!url || url.startsWith('data:')) return false;
+    try {
+        const parsed = new URL(url);
+        return ['http:', 'https:'].includes(parsed.protocol);
+    } catch {
+        return false;
+    }
+};
+
+if ('success' in imageAssetsResult && imageAssetsResult.success && imageAssetsResult.markdown) {
+    const md = imageAssetsResult.markdown;
+    const mdImgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
+    let match: RegExpExecArray | null;
+    while ((match = mdImgRegex.exec(md)) !== null) {
+        const alt = (match[1] || '').trim();
+        const urlCandidate = match[2];
+        const title = alt ? alt.replace(/\s+/g, '-') : '';
+        if (!urlCandidate) continue;
+        try {
+            const absoluteUrl = new URL(urlCandidate, input.url).toString();
+            if (isValidImageUrl(absoluteUrl)) {
+                imageAssets.push({ url: absoluteUrl, title });
+            }
+        } catch {
+            // Skip invalid URLs
+        }
+    }
+} else if (result.html) {
+    // Fallback: parse from HTML if markdown not available
+    const imgTagRegex = /<img[^>]*>/gi;
+    const srcRegex = /src=["']([^"']+)["']/i;
+    const altRegex = /alt=["']([^"']*)["']/i;
+    let tagMatch: RegExpExecArray | null;
+    while ((tagMatch = imgTagRegex.exec(result.html)) !== null) {
+        const tag = tagMatch[0];
+        const srcMatch = srcRegex.exec(tag);
+        if (!srcMatch) continue;
+        const srcCandidate = srcMatch[1] ?? '';
+        const altMatch = altRegex.exec(tag);
+        const alt = (altMatch?.[1] ?? '').trim();
+        const title = alt ? alt.replace(/\s+/g, '-') : '';
+        try {
+            const absoluteUrl = new URL(srcCandidate, input.url).toString();
+            imageAssets.push({ url: absoluteUrl, title });
+        } catch {
+            imageAssets.push({ url: srcCandidate, title });
+        }
+    }
+}
+
+// Dedupe by URL
+const byUrl = new Map<string, { url: string; title: string }>();
+for (const asset of imageAssets) {
+    if (!byUrl.has(asset.url)) {
+        byUrl.set(asset.url, asset);
+    }
+}
+imageAssets = Array.from(byUrl.values());
+
+                const { model, headers } = await initModel({
+                    provider: LLMProvider.ANTHROPIC,
+                    model: ANTHROPIC_MODELS.SONNET_4,
+                });
+
+                const { object } = await generateObject({
+                    model,
+                    headers,
+                    schema: DesignSchema,
+                    messages: [
+                        {
+                            role: 'system',
+                            content: CLONE_WEBSITE_DESIGN_PROMPT,
+                        },
+                        {
+                            role: 'user',
+                            content: `HTML: ${result.html}
+                            Markdown: ${result.markdown}
+                            Screenshot: ${result.screenshot}`,
+                        },
+                    ],
+                    maxOutputTokens: 10000,
+                });
-                const { model, headers } = await initModel({
-                    provider: LLMProvider.ANTHROPIC,
-                    model: ANTHROPIC_MODELS.SONNET_4,
-                });
-
-                const { object } = await generateObject({
-                    model,
-                    headers,
-                    schema: DesignSchema,
-                    messages: [
-                        {
-                            role: 'system',
-                            content: CLONE_WEBSITE_DESIGN_PROMPT,
-                        },
-                        {
-                            role: 'user',
-                            content: `HTML: ${result.html}
-                            Markdown: ${result.markdown}
-                            Screenshot: ${result.screenshot}`,
-                        },
-                    ],
-                    maxOutputTokens: 10000,
-                });
+                let designDocument: z.infer<typeof DesignSchema> | null = null;
+
+                try {
+                    const { model, headers } = await initModel({
+                        provider: LLMProvider.ANTHROPIC,
+                        model: ANTHROPIC_MODELS.SONNET_4,
+                    });
+
+                    const { object } = await generateObject({
+                        model,
+                        headers,
+                        schema: DesignSchema,
+                        messages: [
+                            {
+                                role: 'system',
+                                content: CLONE_WEBSITE_DESIGN_PROMPT,
+                            },
+                            {
+                                role: 'user',
+                                content: `HTML: ${result.html}
+                                Markdown: ${result.markdown}
+                                Screenshot: ${result.screenshot}`,
+                            },
+                        ],
+                        maxOutputTokens: 10000,
+                    });
+
+                    designDocument = object;
+                } catch (error) {
+                    console.error('Failed to generate design document:', error);
+                    // Continue without design document rather than failing entire operation
+                }
-                const { model, headers } = await initModel({
-                    provider: LLMProvider.ANTHROPIC,
-                    model: ANTHROPIC_MODELS.SONNET_4,
-                });
-
-                const { object } = await generateObject({
-                    model,
-                    headers,
-                    schema: DesignSchema,
-                    messages: [
-                        {
-                            role: 'system',
-                            content: CLONE_WEBSITE_DESIGN_PROMPT,
-                        },
-                        {
-                            role: 'user',
-                            content: `HTML: ${result.html}
-                            Markdown: ${result.markdown}
-                            Screenshot: ${result.screenshot}`,
-                        },
-                    ],
-                    maxOutputTokens: 10000,
-                });
+                let designDocument: z.infer<typeof DesignSchema> | null = null;
+
+                try {
+                    const { model, headers } = await initModel({
+                        provider: LLMProvider.ANTHROPIC,
+                        model: ANTHROPIC_MODELS.SONNET_4,
+                    });
+
+                    const { object } = await generateObject({
+                        model,
+                        headers,
+                        schema: DesignSchema,
+                        messages: [
+                            {
+                                role: 'system',
+                                content: CLONE_WEBSITE_DESIGN_PROMPT,
+                            },
+                            {
+                                role: 'user',
+                                content: `HTML: ${result.html}
+                                Markdown: ${result.markdown}
+                                Screenshot: ${result.screenshot}`,
+                            },
+                        ],
+                        maxOutputTokens: 10000,
+                    });
+
+                    designDocument = object;
+                } catch (error) {
+                    console.error('Failed to generate design document:', error);
+                    // Continue without design document rather than failing entire operation
+                }
+
+                const designDocument: z.infer<typeof DesignSchema> = object;
+
+                return {
+                    result: {
+                        markdown: result.markdown || '',
+                        html: result.html || '',
+                        designScreenshot: result.screenshot || '',
+                        designDocument: designDocument,
+                        assets: imageAssets,
+                    },
+                    error: null,
+                };
-                return {
-                    result: {
-                        markdown: result.markdown || '',
-                        html: result.html || '',
-                        designScreenshot: result.screenshot || '',
-                        designDocument: designDocument,
-                        assets: imageAssets,
-                    },
-                    error: null,
-                };
+                return {
+                    result: {
+                        markdown: result.markdown ?? '',
+                        html: result.html ?? '',
+                        designScreenshot: result.screenshot ?? '',
+                        designDocument: designDocument,
+                        assets: imageAssets,
+                    },
+                    error: null,
+                };
-                return {
-                    result: {
-                        markdown: result.markdown || '',
-                        html: result.html || '',
-                        designScreenshot: result.screenshot || '',
-                        designDocument: designDocument,
-                        assets: imageAssets,
-                    },
-                    error: null,
-                };
+                return {
+                    result: {
+                        markdown: result.markdown ?? '',
+                        html: result.html ?? '',
+                        designScreenshot: result.screenshot ?? '',
+                        designDocument: designDocument,
+                        assets: imageAssets,
+                    },
+                    error: null,
+                };
+            } catch (error) {
+                console.error('Error cloning website:', error);
+                return {
+                    error: error instanceof Error ? error.message : 'Unknown error',
+                    result: null,
+                };
+            }
+        }),
 });
diff --git a/packages/ai/src/prompt/clone.ts b/packages/ai/src/prompt/clone.ts
@@ -0,0 +1,11 @@
+export const CLONE_WEBSITE_DESIGN_PROMPT = `You are an expert web designer and UX specialist.
+
+Given the HTML, Markdown, and a screenshot of a web page, analyze the entire page from top to bottom, starting at the very top of the screenshot and continuing all the way to the bottom. Do not miss any section—your goal is to create a complete and exhaustive design document that is as accurate as possible, down to every single pixel.
+
+Break down the page into a dynamic list of sections, ordered from top to bottom as they appear visually. For each section, provide:
+- "type": the section type (e.g., "navBar", "hero", "footer", "sidebar", etc.)
+- "description": a highly accurate, detailed explanation of the section's content, purpose, and visual appearance. Be specific about layout, spacing, alignment, colors, typography, and any unique style details. Ensure your description is as precise as possible and reflects the exact look and feel of the section, with pixel-level accuracy.
+- "styles": a concise summary of the key CSS styles or visual properties that define this section (e.g., background color, font size, padding, margin, border, flex/grid usage, etc.). Focus on what makes the section pixel perfect.
+
+
+Return your analysis as a JSON object with a "sections" array. Do not include any other text or commentary. Only return the JSON object.`;
diff --git a/packages/ai/src/prompt/create.ts b/packages/ai/src/prompt/create.ts
@@ -1,4 +1,16 @@
 export const CREATE_NEW_PAGE_SYSTEM_PROMPT = `IMPORTANT:
 - The following is the first user message meant to set up the project from a blank slate.
 - You will be given a prompt and optional images. You need to update a Next.js project that matches the prompt.
-- Try to use a distinct style and infer it from the prompt. For example, if the prompt is for something artistic, you should make this look distinct based on the intent.`;
+- Try to use a distinct style and infer it from the prompt. For example, if the prompt is for something artistic, you should make this look distinct based on the intent.
+- If the user request satisfies the conditions for using the clone_website tool, call the clone_website tool.
+
+
+<cloning_instructions>
+- Conditions for using the clone_website tool: 
+  - The user request is specifically to clone a website
+  - The user query explicitly mentions a relevant keyword such as "clone"
+  - The user query MUST explicitly mentions a concrete website URL. Even if the user request is to clone a website, if the user query does not explicitly mention a concrete website URL, you must ask the user to provide a concrete website URL.
+- If the above conditions are met, immediately call the clone_website tool with that website_url
+- IMPORTANT: The clone_website tool must be about creating a pixel perfect clone of the website that is related to the original user request.
+</cloning_instructions>
+`;
diff --git a/packages/ai/src/tools/tools/web.ts b/packages/ai/src/tools/tools/web.ts
@@ -1,5 +1,6 @@
 import { tool } from 'ai';
 import { z } from 'zod';
+import { BRANCH_ID_SCHEMA } from './branch';
 
 export const SCRAPE_URL_TOOL_NAME = 'scrape_url';
 export const SCRAPE_URL_TOOL_PARAMETERS = z.object({
@@ -44,3 +45,14 @@ export const webSearchTool = tool({
     description: 'Search the web for up-to-date information',
     inputSchema: WEB_SEARCH_TOOL_PARAMETERS,
 });
+
+export const CLONE_WEBSITE_TOOL_NAME = 'clone_website';
+export const CLONE_WEBSITE_TOOL_PARAMETERS = z.object({
+    url: z.string().url().describe('The URL to clone. Must be a valid HTTP or HTTPS URL.'),
+    branchId: BRANCH_ID_SCHEMA,
+});
+export const cloneWebsiteTool = tool({
+    description:
+        'Clone a website by scraping its content and returning the HTML, a markdown version, reference screenshot of what the website looks like, reference design document, and a list of assets that you can use. Use these outputs as references to pixel perfect replicate the website’s design and layout.',
+    inputSchema: CLONE_WEBSITE_TOOL_PARAMETERS,
+});
diff --git a/packages/ai/src/tools/toolset.ts b/packages/ai/src/tools/toolset.ts
@@ -36,6 +36,8 @@ import {
     webSearchTool,
     WRITE_FILE_TOOL_NAME,
     writeFileTool,
+    CLONE_WEBSITE_TOOL_NAME,
+    cloneWebsiteTool,
 } from './tools';
 
 export const ASK_TOOL_SET: ToolSet = {
@@ -61,4 +63,5 @@ export const BUILD_TOOL_SET: ToolSet = {
     [SANDBOX_TOOL_NAME]: sandboxTool,
     [TERMINAL_COMMAND_TOOL_NAME]: terminalCommandTool,
     [TYPECHECK_TOOL_NAME]: typecheckTool,
+    [CLONE_WEBSITE_TOOL_NAME]: cloneWebsiteTool,
 };