Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export const providerNames = [
"fireworks",
"io-intelligence",
"roo",
"featherless",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems unrelated to the Unicode file mention fix described in the PR. Adding a new Featherless provider is a completely separate feature that should be in its own PR. Mixing unrelated changes makes it harder to review and can introduce unexpected issues.

] as const

export const providerNamesSchema = z.enum(providerNames)
Expand Down Expand Up @@ -293,6 +294,10 @@ const rooSchema = apiModelIdProviderModelSchema.extend({
// No additional fields needed - uses cloud authentication
})

const featherlessSchema = apiModelIdProviderModelSchema.extend({
featherlessApiKey: z.string().optional(),
})

const defaultSchema = z.object({
apiProvider: z.undefined(),
})
Expand Down Expand Up @@ -330,6 +335,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
fireworksSchema.merge(z.object({ apiProvider: z.literal("fireworks") })),
ioIntelligenceSchema.merge(z.object({ apiProvider: z.literal("io-intelligence") })),
rooSchema.merge(z.object({ apiProvider: z.literal("roo") })),
featherlessSchema.merge(z.object({ apiProvider: z.literal("featherless") })),
defaultSchema,
])

Expand Down Expand Up @@ -367,6 +373,7 @@ export const providerSettingsSchema = z.object({
...fireworksSchema.shape,
...ioIntelligenceSchema.shape,
...rooSchema.shape,
...featherlessSchema.shape,
...codebaseIndexProviderSchema.shape,
})

Expand Down
81 changes: 81 additions & 0 deletions packages/types/src/providers/featherless.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import type { ModelInfo } from "../model.js"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire file is unrelated to fixing Unicode support for file mentions. Could we separate the Featherless provider addition into a different PR to keep changes focused?


// Featherless AI models - https://api.featherless.ai/v1/models
export type FeatherlessModelId = keyof typeof featherlessModels

export const featherlessDefaultModelId: FeatherlessModelId = "meta-llama/Meta-Llama-3.1-8B-Instruct"

export const featherlessModels = {
"meta-llama/Meta-Llama-3.1-8B-Instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.1,
outputPrice: 0.1,
description: "Meta's Llama 3.1 8B Instruct model with 128K context window",
},
"meta-llama/Meta-Llama-3.1-70B-Instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.4,
description: "Meta's Llama 3.1 70B Instruct model with 128K context window",
},
"meta-llama/Meta-Llama-3.1-405B-Instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 2.0,
outputPrice: 2.0,
description: "Meta's largest Llama 3.1 405B Instruct model with 128K context window",
},
"Qwen/Qwen2.5-72B-Instruct": {
maxTokens: 8192,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.4,
outputPrice: 0.4,
description: "Alibaba's Qwen 2.5 72B Instruct model with 128K context window",
},
"mistralai/Mistral-7B-Instruct-v0.3": {
maxTokens: 8192,
contextWindow: 32768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.1,
outputPrice: 0.1,
description: "Mistral's 7B Instruct v0.3 model with 32K context window",
},
"mistralai/Mixtral-8x7B-Instruct-v0.1": {
maxTokens: 8192,
contextWindow: 32768,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.3,
description: "Mistral's Mixtral 8x7B MoE Instruct model with 32K context window",
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
maxTokens: 4096,
contextWindow: 131072,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.05,
outputPrice: 0.05,
description: "DeepSeek R1 Distill Qwen 1.5B model with 128K context window",
},
"moonshotai/Kimi-K2-Instruct": {
maxTokens: 16384,
contextWindow: 16384,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 0.3,
description: "Moonshot AI's Kimi K2 Instruct model with tool use support",
},
} as const satisfies Record<string, ModelInfo>
1 change: 1 addition & 0 deletions packages/types/src/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export * from "./cerebras.js"
export * from "./chutes.js"
export * from "./claude-code.js"
export * from "./deepseek.js"
export * from "./featherless.js"
export * from "./gemini.js"
export * from "./glama.js"
export * from "./groq.js"
Expand Down
3 changes: 3 additions & 0 deletions src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
RequestyHandler,
HumanRelayHandler,
FakeAIHandler,
FeatherlessHandler,
XAIHandler,
GroqHandler,
HuggingFaceHandler,
Expand Down Expand Up @@ -143,6 +144,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
return new IOIntelligenceHandler(options)
case "roo":
return new RooHandler(options)
case "featherless":
return new FeatherlessHandler(options)
default:
apiProvider satisfies "gemini-cli" | undefined
return new AnthropicHandler(options)
Expand Down
19 changes: 19 additions & 0 deletions src/api/providers/featherless.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { type FeatherlessModelId, featherlessDefaultModelId, featherlessModels } from "@roo-code/types"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another file that's unrelated to the Unicode fix. Also, if this provider implementation stays (though it should be in a separate PR), where are the tests for it?


import type { ApiHandlerOptions } from "../../shared/api"

import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"

export class FeatherlessHandler extends BaseOpenAiCompatibleProvider<FeatherlessModelId> {
constructor(options: ApiHandlerOptions) {
super({
...options,
providerName: "Featherless",
baseURL: "https://api.featherless.ai/v1",
apiKey: options.featherlessApiKey,
defaultProviderModelId: featherlessDefaultModelId,
providerModels: featherlessModels,
defaultTemperature: 0.7,
})
}
}
1 change: 1 addition & 0 deletions src/api/providers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export { DeepSeekHandler } from "./deepseek"
export { DoubaoHandler } from "./doubao"
export { MoonshotHandler } from "./moonshot"
export { FakeAIHandler } from "./fake-ai"
export { FeatherlessHandler } from "./featherless"
export { GeminiHandler } from "./gemini"
export { GlamaHandler } from "./glama"
export { GroqHandler } from "./groq"
Expand Down
15 changes: 15 additions & 0 deletions src/shared/__tests__/context-mentions.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ describe("mentionRegex and mentionRegexGlobal", () => {
{ input: "@a1b2c3d", expected: ["@a1b2c3d"] }, // Git commit hash (short)
{ input: "@a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0", expected: ["@a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"] }, // Git commit hash (long)

// Unicode file paths (Chinese, Japanese, Korean, Arabic, etc.)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage for various Unicode scripts! Consider adding edge cases like:

  • RTL text mixed with LTR paths (e.g., Hebrew/Arabic text with English paths)
  • Zero-width characters and combining marks
  • Complex emoji with skin tone modifiers or ZWJ sequences
  • Unicode normalization differences (NFC vs NFD)

{ input: "@/路径/中文文件.txt", expected: ["@/路径/中文文件.txt"] }, // Chinese characters
{ input: "@/パス/日本語ファイル.txt", expected: ["@/パス/日本語ファイル.txt"] }, // Japanese characters
{ input: "@/경로/한국어파일.txt", expected: ["@/경로/한국어파일.txt"] }, // Korean characters
{ input: "@/مسار/ملف_عربي.txt", expected: ["@/مسار/ملف_عربي.txt"] }, // Arabic characters
{ input: "@/путь/русский_файл.txt", expected: ["@/путь/русский_файл.txt"] }, // Cyrillic characters
{ input: "@/dossier/fichier_français.txt", expected: ["@/dossier/fichier_français.txt"] }, // French accents
{ input: "@/carpeta/archivo_español.txt", expected: ["@/carpeta/archivo_español.txt"] }, // Spanish characters
{ input: "@/文件夹/", expected: ["@/文件夹/"] }, // Chinese folder
{ input: "@/mixed/中文_english_日本語.txt", expected: ["@/mixed/中文_english_日本語.txt"] }, // Mixed languages
{ input: "@/emoji/file_😀_test.txt", expected: ["@/emoji/file_😀_test.txt"] }, // Emoji in filename
{ input: "@/path/文件\\ with\\ 空格.txt", expected: ["@/path/文件\\ with\\ 空格.txt"] }, // Unicode with escaped spaces

// Mentions within text
{
input: "Check file @/path/to/file\\ with\\ spaces.txt for details.",
Expand All @@ -33,6 +46,8 @@ describe("mentionRegex and mentionRegexGlobal", () => {
{ input: "See @problems and @terminal output.", expected: ["@problems", "@terminal"] },
{ input: "URL: @https://example.com.", expected: ["@https://example.com"] }, // Trailing punctuation
{ input: "Commit @a1b2c3d, then check @/file.txt", expected: ["@a1b2c3d", "@/file.txt"] },
{ input: "Check @/文档/报告.pdf for details", expected: ["@/文档/报告.pdf"] }, // Unicode in sentence
{ input: "Files: @/файл1.txt, @/файл2.txt", expected: ["@/файл1.txt", "@/файл2.txt"] }, // Multiple Unicode mentions

// Negative cases (should not match or match partially)
{ input: "@/path/with unescaped space.txt", expected: ["@/path/with"] }, // Unescaped space
Expand Down
31 changes: 16 additions & 15 deletions src/shared/context-mentions.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
/*
Mention regex:
- **Purpose**:
- To identify and highlight specific mentions in text that start with '@'.
- These mentions can be file paths, URLs, or the exact word 'problems'.
- **Purpose**:
- To identify and highlight specific mentions in text that start with '@'.
- These mentions can be file paths (including Unicode characters), URLs, or specific keywords.
- Ensures that trailing punctuation marks (like commas, periods, etc.) are not included in the match, allowing punctuation to follow the mention without being part of it.

- **Regex Breakdown**:
- `/@`:
- `/@`:
- **@**: The mention must start with the '@' symbol.

- `((?:\/|\w+:\/\/)[^\s]+?|problems\b|git-changes\b)`:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional? The comment in line 12 still shows the old regex pattern structure without mentioning the Unicode flag. Since we're updating the documentation to mention Unicode support, shouldn't we also update the actual pattern shown in the breakdown to reflect the 'u' flag that's now part of the pattern?

- **Capturing Group (`(...)`)**: Captures the part of the string that matches one of the specified patterns.
- `(?:\/|\w+:\/\/)`:
- `(?:\/|\w+:\/\/)`:
- **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them for back-referencing.
- `\/`:
- `\/`:
- **Slash (`/`)**: Indicates that the mention is a file or folder path starting with a '/'.
- `|`: Logical OR.
- `\w+:\/\/`:
- **Protocol (`\w+://`)**: Matches URLs that start with a word character sequence followed by '://', such as 'http://', 'https://', 'ftp://', etc.
- `(?:[^\s\\]|\\ )+?`:
- **Non-Capturing Group (`(?:...)`)**: Groups the alternatives without capturing them.
- **Non-Whitespace and Non-Backslash (`[^\s\\]`)**: Matches any character that is not whitespace or a backslash.
- **Non-Whitespace and Non-Backslash (`[^\s\\]`)**: Matches any character that is not whitespace or a backslash, including Unicode characters.
- **OR (`|`)**: Logical OR.
- **Escaped Space (`\\ `)**: Matches a backslash followed by a space (an escaped space).
- **Non-Greedy (`+?`)**: Ensures the smallest possible match, preventing the inclusion of trailing punctuation.
- `|`: Logical OR.
- `problems\b`:
- `problems\b`:
- **Exact Word ('problems')**: Matches the exact word 'problems'.
- **Word Boundary (`\b`)**: Ensures that 'problems' is matched as a whole word and not as part of another word (e.g., 'problematic').
- `|`: Logical OR.
Expand All @@ -34,28 +34,29 @@ Mention regex:
- **Word Boundary (`\b`)**: Ensures that 'terminal' is matched as a whole word and not as part of another word (e.g., 'terminals').
- `(?=[.,;:!?]?(?=[\s\r\n]|$))`:
- **Positive Lookahead (`(?=...)`)**: Ensures that the match is followed by specific patterns without including them in the match.
- `[.,;:!?]?`:
- `[.,;:!?]?`:
- **Optional Punctuation (`[.,;:!?]?`)**: Matches zero or one of the specified punctuation marks.
- `(?=[\s\r\n]|$)`:
- `(?=[\s\r\n]|$)`:
- **Nested Positive Lookahead (`(?=[\s\r\n]|$)`)**: Ensures that the punctuation (if present) is followed by a whitespace character, a line break, or the end of the string.

- **Summary**:
- The regex effectively matches:
- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters (including periods within the path).
- File paths can include spaces if they are escaped with a backslash (e.g., `@/path/to/file\ with\ spaces.txt`).
- Mentions that are file or folder paths starting with '/' and containing any non-whitespace characters, including Unicode characters like Chinese, Japanese, Korean, etc.
- File paths can include spaces if they are escaped with a backslash (e.g., `@/path/to/file\ with\ spaces.txt` or `@/路径/中文文件.txt`).
- URLs that start with a protocol (like 'http://') followed by any non-whitespace characters (including query parameters).
- The exact word 'problems'.
- The exact word 'git-changes'.
- The exact word 'terminal'.
- It ensures that any trailing punctuation marks (such as ',', '.', '!', etc.) are not included in the matched mention, allowing the punctuation to follow the mention naturally in the text.
- The 'u' flag enables full Unicode support, allowing the regex to properly match Unicode characters in file paths.

- **Global Regex**:
- `mentionRegexGlobal`: Creates a global version of the `mentionRegex` to find all matches within a given string.
- `mentionRegexGlobal`: Creates a global version of the `mentionRegex` with Unicode support to find all matches within a given string.

*/
export const mentionRegex =
/(?<!\\)@((?:\/|\w+:\/\/)(?:[^\s\\]|\\ )+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/
export const mentionRegexGlobal = new RegExp(mentionRegex.source, "g")
/(?<!\\)@((?:\/|\w+:\/\/)(?:[^\s\\]|\\ )+?|[a-f0-9]{7,40}\b|problems\b|git-changes\b|terminal\b)(?=[.,;:!?]?(?=[\s\r\n]|$))/u
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Has the performance impact of the Unicode flag been tested with large texts containing many mentions? The 'u' flag can affect regex performance, especially with complex patterns and long strings.

export const mentionRegexGlobal = new RegExp(mentionRegex.source, "gu")

// Regex to match command mentions like /command-name anywhere in text
export const commandRegexGlobal = /(?:^|\s)\/([a-zA-Z0-9_\.-]+)(?=\s|$)/g
Expand Down
Loading