OpenAI-sublime-text/openAI.sublime-settings at develop · dam024/OpenAI-sublime-text · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
//
// PROJECT-SPECIFIC CHAT HISTORY:
// To maintain separate chat histories for different Sublime Text projects,
// you can define a 'cache_prefix' within your .sublime-project file.
// Add the following structure to your project settings (e.g., Project > Edit Project):
//
// {
//     "settings": {
//         "ai_assistant": {
//             "cache_prefix": "/path/to/your/project/.openai_cache/" // Or any unique path
//         }
//     }
// }
//
// The plugin will then store chat history for that project in the specified directory.
// If not set, a global cache path is used.
// See README.md for more details on chat history management.
//
{
    // Apply Sublime Text markdown syntax highlight to OpenAI completion output view text.
    // Affects only `"output_mode": "view"`.
    // `MultimarkdownEditing` package highly recommended to install to apply syntax highlight for a wider range of languages.
    "markdown": true,

    // Settings that configures presentation of a chat view for both output view and chat tap
    "chat_presentation": {
        // Toggles whether to present gutter itself
        "gutter_enabled": true,

        // Toggles whether to present line numbers row
        "line_numbers_enabled": true,

        // Toggles whether to allow the chat view to be overscrolled
        "scroll_past_end": false,

        // Toggles whether or not to use reverse settings for a tab comparing to output view
        // i.e. in case of the default settings neither gutter or a line numbers will be presented and the overscroll is enabled
        "reverse_for_tab": true,

        // For phantom mode, for "In New Tab" option you can set this to true to avoid save file on close dialog for each view opened by plugin
        "is_tabs_discardable": false,

        // For phantom mode, extract the code from the AI response when integrating the answer inside a code file (copy, append, replace, new file)
        "phantom_integrate_code_only": false,

        // Settings to false will make Phantom always works as a one shot option,
        // while keeping view mode as a permanent fallback
        //
        // Useful is you want to use phantoms for clarification while keepin your session in the chat view.
        "phantom_permanent": true,
    },

    // Minimum amount of characters selected to perform completion.
    "minimum_selection_length": 10,

    // The number of lines to read from the end of the standard build output panel (exec)
    // -1 to read all the output (be carefull with that build output can be reeeeeeeeealy long)
    "build_output_limit": 100,

    // Status bar hint setup that presents major info about currently active assistant setup (from the array of assistant objects above)
    // Possible options:
    //  - name: User defined assistant setup name
    //  - output_mode: Model output prompt mode (view|phantom)
    //  - chat_model: Which OpenAI model are used within this setup (i.e. gpt-4o-mini, gpt-4.1).
    //  - sheets: Number of sheets selected as context.
    //
    // You're capable to mix these whatever you want and the text in status bar will follow.
    "status_hint": [
        // "name",
        // "output_mode",
        // "chat_model",
        // "sheets"
     ],

    // Proxy setting
    // "proxy": {
    //     // Proxy address
    //     "address": "",

    //     // Proxy port
    //     "port": 8080,

    //     // Proxy username
    //     "username": "",

    //     // Proxy password
    //     "password": ""
    // },

    "assistants": [
        {
            // A string that will presented in command palette.
            "name": "Example", // **REQUIRED**

            // The model that generates the chat completion.
            // Generally here should be either "gpt-4o-latest|gpt-4o-mini" or their specified versions.
            // If using custom API, refer to their documentation for supported models.
            // Learn more at https://beta.openai.com/docs/models
            "chat_model": "gpt-4o-mini", // **REQUIRED**

            // ChatGPT model knows how to role, lol
            // It can act as a different kind of person. Recently in this plugin it was acting
            // like as a code assistant. With this setting you're able to set it up more precisely.
            // E.g. "You are (rust|python|js|whatever) developer assistant", "You are an english tutor" and so on.
            // HINT: This is either `system` or `developer` message depending on `api_type` set.
            "assistant_role": "You are a senior code assistant",

            // URL for OpenAI alike API.
            // Example: "http://localhost:11434" (assuming Ollama is running on localhost)
            "url": "https://api.openai.com/v1/chat/completions",

            // Your whaterver service token
            // OpenAI and any other alike API token to put.
            "token": "dummy_token",

            // Toggle for function calls llm capability
            // Check if your llm supports this feature before toggling this on
            "tools": true,

            // What sampling temperature to use, between 0 and 2.
            // Higher values like 0.8 will make the output more random,
            // while lower values like 0.2 will make it more focused and deterministic.
            //
            // OpenAI generally recommend altering this or `top_p` but not both.
            "temperature": 1.0,

            // The maximum number of tokens to generate in the completion.
            // The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
            // (One token is roughly 4 characters for normal English text)
            // DEPRECATED for OpenAI endpoint
            // USE EITHER this or `"max_completion_tokens"` setting
            "max_tokens": 4096,

            // Since o1 (September 2024) OpenAI deprecated max_token key,
            // Use this field to set the cap instead. The default value set here is recommended by OpenAI
            // _minimal_ value for o1 model suggested by OpenAI is 250000. https://platform.openai.com/docs/guides/reasoning/allocating-space-for-reasoning
            "max_completion_tokens": 4096,

            // `"api_type": "open_ai"` only
            // The matter of efforts reasoning models to put in an answer
            // - "low"
            // - "medium"
            // - "high"
            "reasoning_effort": "low",

            // An alternative to sampling with temperature, called nucleus sampling,
            // where the model considers the results of the tokens with `top_p` probability mass.
            // So 0.1 means only the tokens comprising the top 10% probability mass are considered.
            //
            // OpenAI generally recommend altering this or `temperature` but not both.
            "top_p": 1.0,

            // Number between -2.0 and 2.0.
            // Positive values penalize new tokens based on their existing frequency in the text so far,
            // decreasing the model's likelihood to repeat the same line verbatim.
            // docs: https://platform.openai.com/docs/api-reference/parameter-details
            "frequency_penalty": 0.0,

            // Toggles whether to stream the response from the server or to get in atomically
            // after llm finishes its prompting.
            //
            // By default this is true.
            "stream": true,

            // Timeout to terminate the connection in seconds
            //
            // Sometimes some servers are just get stuck, so to not await forever till response will be accomplished set this setting to some value.
            // Default value is 10.
            "timeout": 10,

            // Number between -2.0 and 2.0.
            // Positive values penalize new tokens based on whether they appear in the text so far,
            // increasing the model's likelihood to talk about new topics.
            // docs: https://platform.openai.com/docs/api-reference/parameter-details
            "presence_penalty": 0.0,

            // APIs are not fully compatible with each other.
            // Pick the wire protocol that matches the remote endpoint.
            // - `open_ai`: classic `/v1/chat/completions`
            // - `plain_text`: legacy OpenAI-compatible chat-completions mode for older/self-hosted endpoints
            // - `open_ai_responses`: OpenAI `/v1/responses`
            // - `anthropic`: Anthropic Messages API
            // - `google`: Google Gemini native `generateContent` / `streamGenerateContent`
            "api_type": "open_ai"
        },

        // Examples
        {
            "name": "General Assistant Localhost",
            "url": "http://0.0.0.0:1234/v1/chat/completions", // See ma, no internet connection.
            "chat_model": "Llama-3-8b-Q4-chat-hf",
            "assistant_role": "1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $20 (depending on the quality of your output).\n10. Always look closely to **ALL** the data provided by a user. It's very important to look so closely as you can there. Ppl can die otherways.\n11. If user strictly asks you about to write the code, write the code first, without explanation, and add them only by additional user request.\n",
            "temperature": 1,
            "tools": true,
            "max_tokens": 4000,
        },
        {
            "name": "General Assistant",
            "token": "sk-dummy-token",
            "chat_model": "gpt-4.1-mini",
            "assistant_role": "1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $20 (depending on the quality of your output).\n10. Always look closely to **ALL** the data provided by a user. It's very important to look so closely as you can there. Ppl can die otherways.\n11. If user strictly asks you about to write the code, write the code first, without explanation, and add them only by additional user request.",
            "max_tokens": 4000,
            "tools": true,
        },
        {
            "name": "Gemini Assistant",
            "url": "https://generativelanguage.googleapis.com/v1beta",
            "token": "YOUR_TOKEN_HERE",
            "chat_model": "gemini-2.5-flash-preview-04-17",
            "assistant_role": "You are a senior code assistant",
            "max_tokens": 4000,
            "api_type": "google"
        },
        {
            "name": "OpenAI Responses Assistant",
            "url": "https://api.openai.com/v1/responses",
            "token": "YOUR_TOKEN_HERE",
            "chat_model": "gpt-5",
            "assistant_role": "You are a senior code assistant",
            "max_completion_tokens": 4000,
            "api_type": "open_ai_responses"
        },
        {
            "name": "Claude Assistant",
            "url": "https://api.anthropic.com/v1/messages",
            "token": "YOUR_TOKEN_HERE",
            "chat_model": "claude-sonnet-4-5",
            "assistant_role": "You are a senior code assistant",
            "max_tokens": 4000,
            "api_type": "anthropic"
        }
    ]
}