Adds support for Hugging Chat models

eamodio · eamodio · commit 51d03bd6c305 · 2024-10-23T15:34:46.000-04:00
diff --git a/src/ai/aiProviderService.ts b/src/ai/aiProviderService.ts
@@ -18,6 +18,7 @@ import { supportedInVSCodeVersion } from '../system/vscode/utils';
 import type { TelemetryService } from '../telemetry/telemetry';
 import { AnthropicProvider } from './anthropicProvider';
 import { GeminiProvider } from './geminiProvider';
+import { HuggingChatProvider } from './huggingchatProvider';
 import { OpenAIProvider } from './openaiProvider';
 import { isVSCodeAIModel, VSCodeAIProvider } from './vscodeProvider';
 import { xAIProvider } from './xaiProvider';
@@ -47,6 +48,7 @@ const _supportedProviderTypes = new Map<AIProviders, AIProviderConstructor>([
 	['openai', OpenAIProvider],
 	['anthropic', AnthropicProvider],
 	['gemini', GeminiProvider],
+	['huggingchat', HuggingChatProvider],
 	['xai', xAIProvider],
 ]);
 
diff --git a/src/ai/huggingchatProvider.ts b/src/ai/huggingchatProvider.ts
@@ -0,0 +1,363 @@
+import { fetch } from '@env/fetch';
+import type { CancellationToken } from 'vscode';
+import { window } from 'vscode';
+import type { HuggingChatModels } from '../constants.ai';
+import type { TelemetryEvents } from '../constants.telemetry';
+import type { Container } from '../container';
+import { CancellationError } from '../errors';
+import { sum } from '../system/iterable';
+import { interpolate } from '../system/string';
+import { configuration } from '../system/vscode/configuration';
+import type { Storage } from '../system/vscode/storage';
+import type { AIModel, AIProvider } from './aiProviderService';
+import { getApiKey as getApiKeyCore, getMaxCharacters } from './aiProviderService';
+import {
+	generateCloudPatchMessageSystemPrompt,
+	generateCloudPatchMessageUserPrompt,
+	generateCodeSuggestMessageSystemPrompt,
+	generateCodeSuggestMessageUserPrompt,
+	generateCommitMessageSystemPrompt,
+	generateCommitMessageUserPrompt,
+} from './prompts';
+
+const provider = { id: 'huggingchat', name: 'Hugging Chat' } as const;
+
+type HuggingChatModel = AIModel<typeof provider.id>;
+const models: HuggingChatModel[] = [
+	{
+		id: 'google/gemma-1.1-2b-it',
+		name: 'Google Gemma 1.1 2B',
+		maxTokens: 4096,
+		provider: provider,
+	},
+	{
+		id: 'HuggingFaceH4/starchat2-15b-v0.1',
+		name: 'HuggingFace Starchat 2.1',
+		maxTokens: 4096,
+		provider: provider,
+	},
+	{
+		id: 'meta-llama/Llama-3.2-3B-Instruct',
+		name: 'Meta Llama 3.1 8B',
+		maxTokens: 4096,
+		provider: provider,
+	},
+	{
+		id: 'microsoft/Phi-3-mini-4k-instruct',
+		name: 'Microsoft Phi 3 Mini',
+		maxTokens: 4096,
+		provider: provider,
+	},
+	{
+		id: 'mistralai/Mistral-Nemo-Instruct-2407',
+		name: 'Mistral Nemo Instruct',
+		maxTokens: 4096,
+		provider: provider,
+	},
+];
+
+export class HuggingChatProvider implements AIProvider<typeof provider.id> {
+	readonly id = provider.id;
+	readonly name = provider.name;
+
+	constructor(private readonly container: Container) {}
+
+	dispose() {}
+
+	getModels(): Promise<readonly AIModel<typeof provider.id>[]> {
+		return Promise.resolve(models);
+	}
+
+	async generateMessage(
+		model: HuggingChatModel,
+		diff: string,
+		reporting: TelemetryEvents['ai/generate'],
+		promptConfig: {
+			type: 'commit' | 'cloud-patch' | 'code-suggestion';
+			systemPrompt: string;
+			userPrompt: string;
+			customInstructions?: string;
+		},
+		options?: { cancellation?: CancellationToken; context?: string },
+	): Promise<string | undefined> {
+		const apiKey = await getApiKey(this.container.storage);
+		if (apiKey == null) return undefined;
+
+		let retries = 0;
+		let maxCodeCharacters = getMaxCharacters(model, 2600);
+		while (true) {
+			const request: HuggingChatChatCompletionRequest = {
+				model: model.id,
+				messages: [
+					{
+						role: 'system',
+						content: promptConfig.systemPrompt,
+					},
+					{
+						role: 'user',
+						content: interpolate(promptConfig.userPrompt, {
+							diff: diff.substring(0, maxCodeCharacters),
+							context: options?.context ?? '',
+							instructions: promptConfig.customInstructions ?? '',
+						}),
+					},
+				],
+			};
+
+			reporting['retry.count'] = retries;
+			reporting['input.length'] = (reporting['input.length'] ?? 0) + sum(request.messages, m => m.content.length);
+
+			const rsp = await this.fetch(apiKey, request, options?.cancellation);
+			if (!rsp.ok) {
+				if (rsp.status === 404) {
+					throw new Error(
+						`Unable to generate ${promptConfig.type} message: Your API key doesn't seem to have access to the selected '${model.id}' model`,
+					);
+				}
+				if (rsp.status === 429) {
+					throw new Error(
+						`Unable to generate ${promptConfig.type} message: (${this.name}:${rsp.status}) Too many requests (rate limit exceeded) or your API key is associated with an expired trial`,
+					);
+				}
+
+				let json;
+				try {
+					json = (await rsp.json()) as { error?: { code: string; message: string } } | undefined;
+				} catch {}
+
+				debugger;
+
+				if (retries++ < 2 && json?.error?.code === 'context_length_exceeded') {
+					maxCodeCharacters -= 500 * retries;
+					continue;
+				}
+
+				throw new Error(
+					`Unable to generate ${promptConfig.type} message: (${this.name}:${rsp.status}) ${
+						json?.error?.message || rsp.statusText
+					}`,
+				);
+			}
+
+			if (diff.length > maxCodeCharacters) {
+				void window.showWarningMessage(
+					`The diff of the changes had to be truncated to ${maxCodeCharacters} characters to fit within the Hugging Chat's limits.`,
+				);
+			}
+
+			const data: HuggingChatChatCompletionResponse = await rsp.json();
+			const message = data.choices[0].message.content.trim();
+			return message;
+		}
+	}
+
+	async generateDraftMessage(
+		model: HuggingChatModel,
+		diff: string,
+		reporting: TelemetryEvents['ai/generate'],
+		options?: {
+			cancellation?: CancellationToken;
+			context?: string;
+			codeSuggestion?: boolean | undefined;
+		},
+	): Promise<string | undefined> {
+		let codeSuggestion;
+		if (options != null) {
+			({ codeSuggestion, ...options } = options ?? {});
+		}
+
+		return this.generateMessage(
+			model,
+			diff,
+			reporting,
+			codeSuggestion
+				? {
+						type: 'code-suggestion',
+						systemPrompt: generateCodeSuggestMessageSystemPrompt,
+						userPrompt: generateCodeSuggestMessageUserPrompt,
+						customInstructions: configuration.get('experimental.generateCodeSuggestionMessagePrompt'),
+				  }
+				: {
+						type: 'cloud-patch',
+						systemPrompt: generateCloudPatchMessageSystemPrompt,
+						userPrompt: generateCloudPatchMessageUserPrompt,
+						customInstructions: configuration.get('experimental.generateCloudPatchMessagePrompt'),
+				  },
+			options,
+		);
+	}
+
+	async generateCommitMessage(
+		model: HuggingChatModel,
+		diff: string,
+		reporting: TelemetryEvents['ai/generate'],
+		options?: { cancellation?: CancellationToken; context?: string },
+	): Promise<string | undefined> {
+		return this.generateMessage(
+			model,
+			diff,
+			reporting,
+			{
+				type: 'commit',
+				systemPrompt: generateCommitMessageSystemPrompt,
+				userPrompt: generateCommitMessageUserPrompt,
+				customInstructions: configuration.get('experimental.generateCommitMessagePrompt'),
+			},
+			options,
+		);
+	}
+
+	async explainChanges(
+		model: HuggingChatModel,
+		message: string,
+		diff: string,
+		reporting: TelemetryEvents['ai/explain'],
+		options?: { cancellation?: CancellationToken },
+	): Promise<string | undefined> {
+		const apiKey = await getApiKey(this.container.storage);
+		if (apiKey == null) return undefined;
+
+		let retries = 0;
+		let maxCodeCharacters = getMaxCharacters(model, 3000);
+		while (true) {
+			const code = diff.substring(0, maxCodeCharacters);
+
+			const request: HuggingChatChatCompletionRequest = {
+				model: model.id,
+				messages: [
+					{
+						role: 'user',
+						content: `You are an advanced AI programming assistant tasked with summarizing code changes into an explanation that is both easy to understand and meaningful. Construct an explanation that:
+- Concisely synthesizes meaningful information from the provided code diff
+- Incorporates any additional context provided by the user to understand the rationale behind the code changes
+- Places the emphasis on the 'why' of the change, clarifying its benefits or addressing the problem that necessitated the change, beyond just detailing the 'what' has changed
+
+Do not make any assumptions or invent details that are not supported by the code diff or the user-provided context.
+
+Here is additional context provided by the author of the changes, which should provide some explanation to why these changes where made. Please strongly consider this information when generating your explanation:\n\n${message}
+
+Now, kindly explain the following code diff in a way that would be clear to someone reviewing or trying to understand these changes:\n\n${code}
+
+Remember to frame your explanation in a way that is suitable for a reviewer to quickly grasp the essence of the changes, the issues they resolve, and their implications on the codebase.`,
+					},
+				],
+			};
+
+			reporting['retry.count'] = retries;
+			reporting['input.length'] = (reporting['input.length'] ?? 0) + sum(request.messages, m => m.content.length);
+
+			const rsp = await this.fetch(apiKey, request, options?.cancellation);
+			if (!rsp.ok) {
+				if (rsp.status === 404) {
+					throw new Error(
+						`Unable to explain changes: Your API key doesn't seem to have access to the selected '${model.id}' model`,
+					);
+				}
+				if (rsp.status === 429) {
+					throw new Error(
+						`Unable to explain changes: (${this.name}:${rsp.status}) Too many requests (rate limit exceeded) or your API key is associated with an expired trial`,
+					);
+				}
+
+				let json;
+				try {
+					json = (await rsp.json()) as { error?: { code: string; message: string } } | undefined;
+				} catch {}
+
+				debugger;
+
+				if (retries++ < 2 && json?.error?.code === 'context_length_exceeded') {
+					maxCodeCharacters -= 500 * retries;
+					continue;
+				}
+
+				throw new Error(
+					`Unable to explain changes: (${this.name}:${rsp.status}) ${json?.error?.message || rsp.statusText}`,
+				);
+			}
+
+			if (diff.length > maxCodeCharacters) {
+				void window.showWarningMessage(
+					`The diff of the changes had to be truncated to ${maxCodeCharacters} characters to fit within the Hugging Chat's limits.`,
+				);
+			}
+
+			const data: HuggingChatChatCompletionResponse = await rsp.json();
+			const summary = data.choices[0].message.content.trim();
+			return summary;
+		}
+	}
+
+	private async fetch(
+		apiKey: string,
+		request: HuggingChatChatCompletionRequest,
+		cancellation: CancellationToken | undefined,
+	) {
+		let aborter: AbortController | undefined;
+		if (cancellation != null) {
+			aborter = new AbortController();
+			cancellation.onCancellationRequested(() => aborter?.abort());
+		}
+
+		try {
+			return await fetch(`https://api-inference.huggingface.co/models/${request.model}/v1/chat/completions`, {
+				headers: {
+					Accept: 'application/json',
+					Authorization: `Bearer ${apiKey}`,
+					'Content-Type': 'application/json',
+				},
+				method: 'POST',
+				body: JSON.stringify(request),
+				signal: aborter?.signal,
+			});
+		} catch (ex) {
+			if (ex.name === 'AbortError') throw new CancellationError(ex);
+
+			throw ex;
+		}
+	}
+}
+
+async function getApiKey(storage: Storage): Promise<string | undefined> {
+	return getApiKeyCore(storage, {
+		id: provider.id,
+		name: provider.name,
+		validator: v => /(?:sk-)?[a-zA-Z0-9]{32,}/.test(v),
+		url: 'https://huggingface.co/settings/tokens',
+	});
+}
+
+interface HuggingChatChatCompletionRequest {
+	model: HuggingChatModels;
+	messages: { role: 'system' | 'user' | 'assistant'; content: string }[];
+	temperature?: number;
+	top_p?: number;
+	n?: number;
+	stream?: boolean;
+	stop?: string | string[];
+	max_tokens?: number;
+	presence_penalty?: number;
+	frequency_penalty?: number;
+	logit_bias?: Record<string, number>;
+	user?: string;
+}
+
+interface HuggingChatChatCompletionResponse {
+	id: string;
+	object: 'chat.completion';
+	created: number;
+	model: string;
+	choices: {
+		index: number;
+		message: {
+			role: 'system' | 'user' | 'assistant';
+			content: string;
+		};
+		finish_reason: string;
+	}[];
+	usage: {
+		prompt_tokens: number;
+		completion_tokens: number;
+		total_tokens: number;
+	};
+}
diff --git a/src/constants.ai.ts b/src/constants.ai.ts