Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/lib/components/chat/ChatWindow.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import { onDestroy } from "svelte";

import IconOmni from "$lib/components/icons/IconOmni.svelte";
import IconCheap from "$lib/components/icons/IconCheap.svelte";
import IconFast from "$lib/components/icons/IconFast.svelte";
import CarbonCaretDown from "~icons/carbon/caret-down";
import { PROVIDERS_HUB_ORGS } from "@huggingface/inference";
import CarbonDirectionRight from "~icons/carbon/direction-right-01";
import IconArrowUp from "~icons/lucide/arrow-up";
import IconMic from "~icons/lucide/mic";
Expand Down Expand Up @@ -297,6 +300,12 @@
(currentModel as unknown as { supportsTools?: boolean }).supportsTools) === true
);

// Get provider override for the current model (HuggingChat only)
let providerOverride = $derived($settings.providerOverrides?.[currentModel.id]);
let hasProviderOverride = $derived(
providerOverride && providerOverride !== "auto" && !currentModel.isRouter
);

// Always allow common text-like files; add images only when model is multimodal
import { TEXT_MIME_ALLOWLIST, IMAGE_MIME_ALLOWLIST_DEFAULT } from "$lib/constants/mime";

Expand Down Expand Up @@ -710,6 +719,31 @@
{currentModel.displayName}
{:else}
Model: {currentModel.displayName}
{#if hasProviderOverride}
{@const hubOrg =
PROVIDERS_HUB_ORGS[providerOverride as keyof typeof PROVIDERS_HUB_ORGS]}
<span
class="inline-flex shrink-0 items-center rounded p-0.5 {providerOverride ===
'fastest'
? 'bg-green-100 text-green-600 dark:bg-green-800/20 dark:text-green-500'
: providerOverride === 'cheapest'
? 'bg-blue-100 text-blue-600 dark:bg-blue-800/20 dark:text-blue-500'
: ''}"
title="Provider: {providerOverride}"
>
{#if providerOverride === "fastest"}
<IconFast classNames="text-sm" />
{:else if providerOverride === "cheapest"}
<IconCheap classNames="text-sm" />
{:else if hubOrg}
<img
src="https://huggingface.co/api/avatars/{hubOrg}"
alt={providerOverride}
class="size-3 flex-none rounded-sm"
/>
{/if}
</span>
{/if}
{/if}
<CarbonCaretDown class="-ml-0.5 text-xxs" />
</a>
Expand Down
20 changes: 20 additions & 0 deletions src/lib/components/icons/IconCheap.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<script lang="ts">
interface Props {
classNames?: string;
}
let { classNames = "" }: Props = $props();
</script>

<svg
class={classNames}
width="1em"
height="1em"
viewBox="0 0 12 12"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M6 7.778a.856.856 0 0 1-.628-.261.858.858 0 0 1-.26-.628c-.001-.245.086-.454.26-.628A.861.861 0 0 1 6 6c.244 0 .453.087.628.261a.852.852 0 0 1 .26.628.867.867 0 0 1-.26.628.844.844 0 0 1-.628.26Zm-2.056-4h4.112l.566-1.134a.423.423 0 0 0-.017-.433A.42.42 0 0 0 8.222 2H3.778c-.17 0-.298.07-.383.211a.424.424 0 0 0-.017.433l.566 1.134ZM4.4 10h3.2c.667 0 1.233-.231 1.7-.694.467-.463.7-1.032.7-1.706 0-.281-.048-.556-.144-.822a2.404 2.404 0 0 0-.412-.722L8.29 4.666H3.71l-1.155 1.39a2.404 2.404 0 0 0-.412.722C2.048 7.044 2 7.318 2 7.6c0 .674.232 1.243.695 1.706.463.463 1.031.694 1.705.694Z"
fill="currentColor"
/>
</svg>
20 changes: 20 additions & 0 deletions src/lib/components/icons/IconFast.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<script lang="ts">
interface Props {
classNames?: string;
}
let { classNames = "" }: Props = $props();
</script>

<svg
class={classNames}
width="1em"
height="1em"
viewBox="0 0 12 12"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M6 2a4 4 0 1 0 0 8 4 4 0 0 0 0-8Zm0 .8A3.2 3.2 0 0 1 9.2 6c0 .96-.4 1.8-1.08 2.4-.56-.52-1.32-.8-2.12-.8s-1.52.28-2.12.8A3.15 3.15 0 0 1 2.8 6 3.2 3.2 0 0 1 6 2.8Zm-.8.8a.4.4 0 1 0 0 .8.4.4 0 0 0 0-.8Zm1.6 0a.4.4 0 1 0 0 .8.4.4 0 0 0 0-.8Zm1.236 1.176c-.052 0-.1.012-.156.024l-1.28.528-.108.044a.807.807 0 0 0-1.053.059.796.796 0 0 0-.008 1.13.796.796 0 0 0 .869.179.81.81 0 0 0 .5-.628l.092-.04 1.288-.52.008-.004a.4.4 0 0 0-.152-.772ZM4 4.8a.4.4 0 1 0 0 .8.4.4 0 0 0 0-.8Z"
fill="currentColor"
/>
</svg>
2 changes: 2 additions & 0 deletions src/lib/server/api/routes/groups/user.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ export const userGroup = new Elysia()
customPrompts: settings?.customPrompts ?? {},
multimodalOverrides: settings?.multimodalOverrides ?? {},
toolsOverrides: settings?.toolsOverrides ?? {},
providerOverrides: settings?.providerOverrides ?? {},
billingOrganization: settings?.billingOrganization ?? undefined,
};
})
Expand All @@ -90,6 +91,7 @@ export const userGroup = new Elysia()
customPrompts: z.record(z.string()).default({}),
multimodalOverrides: z.record(z.boolean()).default({}),
toolsOverrides: z.record(z.boolean()).default({}),
providerOverrides: z.record(z.string()).default({}),
disableStream: z.boolean().default(false),
directPaste: z.boolean().default(false),
hidePromptExamples: z.record(z.boolean()).default({}),
Expand Down
2 changes: 2 additions & 0 deletions src/lib/server/endpoints/endpoints.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ export interface EndpointParameters {
conversationId?: ObjectId;
locals: App.Locals | undefined;
abortSignal?: AbortSignal;
/** Inference provider preference: "auto", "fastest", "cheapest", or a specific provider name */
provider?: string;
}

export type TextGenerationStreamOutputSimplified = TextGenerationStreamOutput & {
Expand Down
15 changes: 13 additions & 2 deletions src/lib/server/endpoints/openai/endpointOai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,21 @@ export async function endpointOai(
conversationId,
locals,
abortSignal,
provider,
}) => {
const prompt = await buildPrompt({
messages,
preprompt,
model,
});

// Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together")
const baseModelId = model.id ?? model.name;
const modelId = provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId;

const parameters = { ...model.parameters, ...generateSettings };
const body: CompletionCreateParamsStreaming = {
model: model.id ?? model.name,
model: modelId,
prompt,
stream: true,
max_tokens: parameters?.max_tokens,
Expand Down Expand Up @@ -165,6 +170,7 @@ export async function endpointOai(
isMultimodal,
locals,
abortSignal,
provider,
}) => {
// Format messages for the chat API, handling multimodal content if supported
let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] =
Expand Down Expand Up @@ -195,8 +201,13 @@ export async function endpointOai(

// Combine model defaults with request-specific parameters
const parameters = { ...model.parameters, ...generateSettings };

// Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together")
const baseModelId = model.id ?? model.name;
const modelId = provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId;

const body = {
model: model.id ?? model.name,
model: modelId,
messages: messagesOpenAI,
stream: streamingSupported,
// Support two different ways of specifying token limits depending on the model
Expand Down
2 changes: 2 additions & 0 deletions src/lib/server/textGeneration/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export async function* generate(
assistant,
promptedAt,
forceMultimodal,
provider,
locals,
abortController,
}: GenerateContext,
Expand Down Expand Up @@ -60,6 +61,7 @@ export async function* generate(
conversationId: conv._id,
locals,
abortSignal: abortController.signal,
provider,
});

for await (const output of stream) {
Expand Down
1 change: 1 addition & 0 deletions src/lib/server/textGeneration/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ async function* textGenerationWithoutTitle(
assistant: ctx.assistant,
forceMultimodal: ctx.forceMultimodal,
forceTools: ctx.forceTools,
provider: ctx.provider,
locals: ctx.locals,
preprompt,
abortSignal: ctx.abortController.signal,
Expand Down
10 changes: 8 additions & 2 deletions src/lib/server/textGeneration/mcp/runMcpFlow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import { AbortedGenerations } from "$lib/server/abortedGenerations";

export type RunMcpFlowContext = Pick<
TextGenerationContext,
"model" | "conv" | "assistant" | "forceMultimodal" | "forceTools" | "locals"
"model" | "conv" | "assistant" | "forceMultimodal" | "forceTools" | "provider" | "locals"
> & { messages: EndpointMessage[] };

// Return type: "completed" = MCP ran successfully, "not_applicable" = MCP didn't run, "aborted" = user aborted
Expand All @@ -44,6 +44,7 @@ export async function* runMcpFlow({
assistant,
forceMultimodal,
forceTools,
provider,
locals,
preprompt,
abortSignal,
Expand Down Expand Up @@ -392,8 +393,13 @@ export async function* runMcpFlow({
? (parameters.stop as string[])
: undefined;

// Build model ID with optional provider suffix (e.g., "model:fastest" or "model:together")
const baseModelId = targetModel.id ?? targetModel.name;
const modelIdWithProvider =
provider && provider !== "auto" ? `${baseModelId}:${provider}` : baseModelId;

const completionBase: Omit<ChatCompletionCreateParamsStreaming, "messages"> = {
model: targetModel.id ?? targetModel.name,
model: modelIdWithProvider,
stream: true,
temperature: typeof parameters?.temperature === "number" ? parameters.temperature : undefined,
top_p: typeof parameters?.top_p === "number" ? parameters.top_p : undefined,
Expand Down
2 changes: 2 additions & 0 deletions src/lib/server/textGeneration/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export interface TextGenerationContext {
forceMultimodal?: boolean;
/** Force-enable tool calling even if model does not advertise support */
forceTools?: boolean;
/** Inference provider preference: "auto", "fastest", "cheapest", or a specific provider name */
provider?: string;
locals: App.Locals | undefined;
abortController: AbortController;
}
3 changes: 2 additions & 1 deletion src/lib/stores/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ type SettingsStore = {
customPrompts: Record<string, string>;
multimodalOverrides: Record<string, boolean>;
toolsOverrides: Record<string, boolean>;
hidePromptExamples: Record<string, boolean>;
providerOverrides: Record<string, string>;
recentlySaved: boolean;
disableStream: boolean;
directPaste: boolean;
hidePromptExamples: Record<string, boolean>;
billingOrganization?: string;
};

Expand Down
8 changes: 8 additions & 0 deletions src/lib/types/Settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ export interface Settings extends Timestamps {
*/
hidePromptExamples?: Record<string, boolean>;

/**
* Per-model inference provider preference.
* Values: "auto" (default), "fastest", "cheapest", or a specific provider name (e.g., "together", "sambanova").
* The value is appended to the model ID when making inference requests (e.g., "model:fastest").
*/
providerOverrides?: Record<string, string>;

disableStream: boolean;
directPaste: boolean;

Expand All @@ -52,6 +59,7 @@ export const DEFAULT_SETTINGS = {
multimodalOverrides: {},
toolsOverrides: {},
hidePromptExamples: {},
providerOverrides: {},
disableStream: false,
directPaste: false,
} satisfies SettingsEditable;
4 changes: 4 additions & 0 deletions src/routes/+layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ export const load = async ({ depends, fetch, url }) => {
welcomeModalSeenAt: settings.welcomeModalSeenAt
? new Date(settings.welcomeModalSeenAt)
: null,
// Ensure providerOverrides has a default value (may not exist in older DB records)
providerOverrides:
(settings as unknown as { providerOverrides?: Record<string, string> }).providerOverrides ??
{},
},
publicConfig: getConfigManager(publicConfig),
...featureFlags,
Expand Down
6 changes: 6 additions & 0 deletions src/routes/conversation/[id]/+server.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { authCondition } from "$lib/server/auth";
import { collections } from "$lib/server/database";
import { config } from "$lib/server/config";
import { models, validModelIdSchema } from "$lib/server/models";
import { ERROR_MESSAGES } from "$lib/stores/errors";
import type { Message } from "$lib/types/Message";
Expand Down Expand Up @@ -556,6 +557,11 @@ export async function POST({ request, locals, params, getClientAddress }) {
forceMultimodal: Boolean(userSettings?.multimodalOverrides?.[model.id]),
// Force-enable tools if user settings say so for this model
forceTools: Boolean(userSettings?.toolsOverrides?.[model.id]),
// Inference provider preference (HuggingChat only, skip for router models)
provider:
config.isHuggingChat && !model.isRouter
? userSettings?.providerOverrides?.[model.id]
: undefined,
locals,
abortController: ctrl,
};
Expand Down
42 changes: 42 additions & 0 deletions src/routes/settings/(nav)/+layout.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
import { useSettingsStore } from "$lib/stores/settings";
import IconOmni from "$lib/components/icons/IconOmni.svelte";
import IconBurger from "$lib/components/icons/IconBurger.svelte";
import IconFast from "$lib/components/icons/IconFast.svelte";
import IconCheap from "$lib/components/icons/IconCheap.svelte";
import CarbonClose from "~icons/carbon/close";
import CarbonTextLongParagraph from "~icons/carbon/text-long-paragraph";
import CarbonChevronLeft from "~icons/carbon/chevron-left";
import LucideImage from "~icons/lucide/image";
import LucideHammer from "~icons/lucide/hammer";
import IconGear from "~icons/bi/gear-fill";
import { PROVIDERS_HUB_ORGS } from "@huggingface/inference";
import { usePublicConfig } from "$lib/utils/PublicConfig.svelte";

const publicConfig = usePublicConfig();

import type { LayoutData } from "../$types";
import { browser } from "$app/environment";
Expand Down Expand Up @@ -200,6 +206,42 @@
</span>
{/if}

{#if publicConfig.isHuggingChat && !model.isRouter && $settings.providerOverrides?.[model.id] && $settings.providerOverrides[model.id] !== "auto"}
{@const providerOverride = $settings.providerOverrides[model.id]}
{@const hubOrg =
PROVIDERS_HUB_ORGS[providerOverride as keyof typeof PROVIDERS_HUB_ORGS]}
{#if providerOverride === "fastest"}
<span
title="Provider: {providerOverride}"
class="grid size-[21px] flex-none place-items-center rounded-md bg-green-500/10 text-green-600 dark:text-green-500"
aria-label="Provider: {providerOverride}"
role="img"
>
<IconFast classNames="size-3" />
</span>
{:else if providerOverride === "cheapest"}
<span
title="Provider: {providerOverride}"
class="grid size-[21px] flex-none place-items-center rounded-md bg-blue-500/10 text-blue-600 dark:text-blue-500"
aria-label="Provider: {providerOverride}"
role="img"
>
<IconCheap classNames="size-3" />
</span>
{:else if hubOrg}
<span
title="Provider: {providerOverride}"
class="flex size-[21px] flex-none items-center justify-center rounded-md bg-gray-500/10 p-[0.225rem]"
>
<img
src="https://huggingface.co/api/avatars/{hubOrg}"
alt={providerOverride}
class="size-full rounded"
/>
</span>
{/if}
{/if}

{#if $settings.customPrompts?.[model.id]}
<CarbonTextLongParagraph
class="size-6 rounded-md border border-gray-300 p-1 text-gray-800 dark:border-gray-600 dark:text-gray-200"
Expand Down
1 change: 1 addition & 0 deletions src/routes/settings/(nav)/+server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export async function POST({ request, locals }) {
customPrompts: z.record(z.string()).default({}),
multimodalOverrides: z.record(z.boolean()).default({}),
toolsOverrides: z.record(z.boolean()).default({}),
providerOverrides: z.record(z.string()).default({}),
disableStream: z.boolean().default(false),
directPaste: z.boolean().default(false),
hidePromptExamples: z.record(z.boolean()).default({}),
Expand Down
Loading
Loading