diff --git a/package.json b/package.json
index ae247ee..1767626 100644
--- a/package.json
+++ b/package.json
@@ -83,37 +83,6 @@
           },
           "inference.model": {
             "type": "string",
-            "enum": [
-              "stable-code:3b-code-q4_0",
-              "codellama:7b-code-q4_K_S",
-              "codellama:7b-code-q4_K_M",
-              "codellama:7b-code-q6_K",
-              "codellama:7b-code-fp16",
-              "codellama:13b-code-q4_K_S",
-              "codellama:13b-code-q4_K_M",
-              "codellama:13b-code-q6_K",
-              "codellama:13b-code-fp16",
-              "codellama:34b-code-q4_K_S",
-              "codellama:34b-code-q4_K_M",
-              "codellama:34b-code-q6_K",
-              "codellama:70b-code-q4_K_S",
-              "codellama:70b-code-q4_K_M",
-              "codellama:70b-code-q6_K",
-              "codellama:70b-code-fp16",
-              "deepseek-coder:1.3b-base-q4_0",
-              "deepseek-coder:1.3b-base-q4_1",
-              "deepseek-coder:1.3b-base-q8_0",
-              "deepseek-coder:6.7b-base-q4_K_S",
-              "deepseek-coder:6.7b-base-q4_K_M",
-              "deepseek-coder:6.7b-base-q5_K_S",
-              "deepseek-coder:6.7b-base-q5_K_M",
-              "deepseek-coder:6.7b-base-q8_0",
-              "deepseek-coder:6.7b-base-fp16",
-              "deepseek-coder:33b-base-q4_K_S",
-              "deepseek-coder:33b-base-q4_K_M",
-              "deepseek-coder:33b-base-fp16",
-              "custom"
-            ],
             "default": "stable-code:3b-code-q4_0",
             "description": "Inference model to use",
             "order": 2
@@ -124,23 +93,6 @@
             "description": "Temperature of the model. Increasing the temperature will make the model answer more creatively.",
             "order": 3
           },
-          "inference.custom.model": {
-            "type": "string",
-            "default": "",
-            "description": "Custom model name",
-            "order": 4
-          },
-          "inference.custom.format": {
-            "type": "string",
-            "enum": [
-              "stable-code",
-              "codellama",
-              "deepseek"
-            ],
-            "default": "stable-code",
-            "description": "Custom model prompt format",
-            "order": 5
-          },
           "inference.maxLines": {
             "type": "number",
             "default": 16,
diff --git a/src/config.ts b/src/config.ts
index a6294b1..0cdafd8 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -1,5 +1,4 @@
 import vscode from 'vscode';
-import { ModelFormat } from './prompts/processors/models';
 
 class Config {
 
@@ -24,17 +23,6 @@ class Config {
 
         // Load model
         let modelName = config.get('model') as string;
-        let modelFormat: ModelFormat = 'codellama';
-        if (modelName === 'custom') {
-            modelName = config.get('custom.model') as string;
-            modelFormat = config.get('cutom.format') as ModelFormat;
-        } else {
-            if (modelName.startsWith('deepseek-coder')) {
-                modelFormat = 'deepseek';
-            } else if (modelName.startsWith('stable-code')) {
-                modelFormat = 'stable-code';
-            }
-        }
 
         let delay = config.get('delay') as number;
 
@@ -45,7 +33,6 @@ class Config {
             maxTokens,
             temperature,
             modelName,
-            modelFormat,
             delay
         };
     }
diff --git a/src/modules/lineGenerator.ts b/src/modules/lineGenerator.ts
deleted file mode 100644
index f20081f..0000000
--- a/src/modules/lineGenerator.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-export async function* lineGenerator(url: string, data: any, bearerToken: string): AsyncGenerator<string> {
-    // Request
-    const controller = new AbortController();
-    let res = await fetch(url, {
-      method: 'POST',
-      body: JSON.stringify(data),
-      headers: bearerToken ? {
-            'Content-Type': 'application/json',
-            Authorization: `Bearer ${bearerToken}`,
-          } : {
-            'Content-Type': 'application/json',
-          },
-      signal: controller.signal,
-    });
-    if (!res.ok || !res.body) {
-        throw Error('Unable to connect to backend');
-    }
-
-    // Reading stream
-    let stream = res.body.getReader();
-    const decoder = new TextDecoder();
-    let pending: string = '';
-    try {
-        while (true) {
-            const { done, value } = await stream.read();
-
-            // If ended
-            if (done) {
-                if (pending.length > 0) { // New lines are impossible here
-                    yield pending;
-                }
-                break;
-            }
-
-            // Append chunk
-            let chunk = decoder.decode(value);
-            console.warn(chunk);
-            pending += chunk;
-
-            // Yield results 
-            while (pending.indexOf('\n') >= 0) {
-                let offset = pending.indexOf('\n');
-                yield pending.slice(0, offset);
-                pending = pending.slice(offset + 1);
-            }
-        }
-    } finally {
-        stream.releaseLock();
-        if (!stream.closed) { // Stop generation
-            await stream.cancel();
-        }
-        controller.abort();
-    }
-}
\ No newline at end of file
diff --git a/src/modules/ollamaCheckModel.ts b/src/modules/ollamaCheckModel.ts
deleted file mode 100644
index 5aedb8f..0000000
--- a/src/modules/ollamaCheckModel.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import { info } from "./log";
-
-export async function ollamaCheckModel(endpoint: string, model: string, bearerToken: string) {
-    // Check if exists
-    let res = await fetch(endpoint + '/api/tags', {
-      headers: bearerToken ? {
-            Authorization: `Bearer ${bearerToken}`,
-          } : {},
-    });
-    if (!res.ok) {
-        info(await res.text());
-        info(endpoint + '/api/tags');
-        throw Error('Network response was not ok.');
-    }
-    let body = await res.json() as { models: { name: string }[] };
-    if (body.models.find((v) => v.name === model)) {
-        return true;
-    } else {
-        return false;
-    }
-}
\ No newline at end of file
diff --git a/src/modules/ollamaDownloadModel.ts b/src/modules/ollamaDownloadModel.ts
deleted file mode 100644
index 7e6eccb..0000000
--- a/src/modules/ollamaDownloadModel.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-import { lineGenerator } from "./lineGenerator";
-import { info } from "./log";
-
-export async function ollamaDownloadModel(endpoint: string, model: string, bearerToken: string) {
-    info('Downloading model from ollama: ' + model);
-    for await (let line of lineGenerator(endpoint + '/api/pull', { name: model }, bearerToken)) {
-        info('[DOWNLOAD] ' + line);
-    }
-}
\ No newline at end of file
diff --git a/src/modules/ollamaRequest.ts b/src/modules/ollamaRequest.ts
new file mode 100644
index 0000000..cbf8d93
--- /dev/null
+++ b/src/modules/ollamaRequest.ts
@@ -0,0 +1,35 @@
+export async function makeOllamaRequest(url: string, data: any, bearerToken: string): Promise<string> {
+    // Request
+    const controller = new AbortController();
+    let res = await fetch(url, {
+      method: 'POST',
+      body: JSON.stringify(data),
+      headers: bearerToken ? {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${bearerToken}`,
+          } : {
+            'Content-Type': 'application/json',
+          },
+      signal: controller.signal,
+    });
+    if (!res.ok || !res.body) {
+        throw Error('Unable to connect to backend');
+    }
+
+    // Reading stream
+    let stream = res.body.getReader();
+    const decoder = new TextDecoder();
+    try {
+        const { value } = await stream.read();
+
+        // Append chunk
+        let chunk = decoder.decode(value);
+        return chunk;
+    } finally {
+        stream.releaseLock();
+        if (!stream.closed) { // Stop generation
+            await stream.cancel();
+        }
+        controller.abort();
+    }
+}
\ No newline at end of file
diff --git a/src/modules/ollamaTokenGenerator.ts b/src/modules/ollamaTokenGenerator.ts
deleted file mode 100644
index f57fef9..0000000
--- a/src/modules/ollamaTokenGenerator.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import { lineGenerator } from "./lineGenerator";
-import { info } from "./log";
-
-export type OllamaToken = {
-    model: string,
-    response: string,
-    done: boolean
-};
-
-export async function* ollamaTokenGenerator(url: string, data: any, bearerToken: string): AsyncGenerator<OllamaToken> {
-    for await (let line of lineGenerator(url, data, bearerToken)) {
-        info('Receive line: ' + line);
-        let parsed: OllamaToken;
-        try {
-            parsed = JSON.parse(line) as OllamaToken;
-        } catch (e) { 
-            console.warn('Receive wrong line: ' + line);
-            continue;
-        }
-        yield parsed;
-    }
-}
\ No newline at end of file
diff --git a/src/prompts/autocomplete.ts b/src/prompts/autocomplete.ts
index b6a8089..3415d17 100644
--- a/src/prompts/autocomplete.ts
+++ b/src/prompts/autocomplete.ts
@@ -1,13 +1,14 @@
-import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator';
-import { countSymbol } from '../modules/text';
-import { info } from '../modules/log';
-import { ModelFormat, adaptPrompt } from './processors/models';
+import { makeOllamaRequest } from "../modules/ollamaRequest";
+
+type OllamaToken = {
+    model: string,
+    response: string,
+};
 
 export async function autocomplete(args: {
     endpoint: string,
     bearerToken: string,
     model: string,
-    format: ModelFormat,
     prefix: string,
     suffix: string,
     maxLines: number,
@@ -16,88 +17,33 @@ export async function autocomplete(args: {
     canceled?: () => boolean,
 }): Promise<string> {
 
-    let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, format: args.format });
-
     // Calculate arguments
     let data = {
         model: args.model,
-        prompt: prompt.prompt,
+        prompt: args.prefix,
+        suffix: args.suffix,
         raw: true,
+        stream: false,
         options: {
-            stop: prompt.stop,
             num_predict: args.maxTokens,
             temperature: args.temperature
         }
     };
 
-    // Receiving tokens
-    let res = '';
-    let totalLines = 1;
-    let blockStack: ('[' | '(' | '{')[] = [];
-    outer: for await (let tokens of ollamaTokenGenerator(args.endpoint + '/api/generate', data, args.bearerToken)) {
+    const res = await makeOllamaRequest(args.endpoint + '/api/generate', data, args.bearerToken);
+    try {
+        const tokens =  JSON.parse(res) as OllamaToken;
         if (args.canceled && args.canceled()) {
-            break;
-        }
-
-        // Block stack
-        for (let c of tokens.response) {
-
-            // Open block
-            if (c === '[') {
-                blockStack.push('[');
-            } else if (c === '(') {
-                blockStack.push('(');
-            }
-            if (c === '{') {
-                blockStack.push('{');
-            }
-
-            // Close block
-            if (c === ']') {
-                if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '[') {
-                    blockStack.pop();
-                } else {
-                    info('Block stack error, breaking.');
-                    break outer;
-                }
-            }
-            if (c === ')') {
-                if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '(') {
-                    blockStack.pop();
-                } else {
-                    info('Block stack error, breaking.');
-                    break outer;
-                }
-            }
-            if (c === '}') {
-                if (blockStack.length > 0 && blockStack[blockStack.length - 1] === '{') {
-                    blockStack.pop();
-                } else {
-                    info('Block stack error, breaking.');
-                    break outer;
-                }
-            }
-
-            // Append charater
-            res += c;
-        }
-
-        // Update total lines
-        totalLines += countSymbol(tokens.response, '\n');
-        // Break if too many lines and on top level
-        if (totalLines > args.maxLines && blockStack.length === 0) {
-            info('Too many lines, breaking.');
-            break;
+            return "";
         }
+        const response = tokens.response;
+        
+        // take only args.maLines lines from the response
+        let lines = response.split('\n');
+        lines = lines.slice(0, args.maxLines);
+        return lines.join('\n');
+    } catch (e) { 
+        console.warn('Receive wrong line: ' + res);
+        return "";
     }
-
-    // Remove <EOT>
-    if (res.endsWith('<EOT>')) {
-        res = res.slice(0, res.length - 5);
-    }
-
-    // Trim ends of all lines since sometimes the AI completion will add extra spaces
-    res = res.split('\n').map((v) => v.trimEnd()).join('\n');
-
-    return res;
 }
\ No newline at end of file
diff --git a/src/prompts/processors/models.ts b/src/prompts/processors/models.ts
deleted file mode 100644
index 058905f..0000000
--- a/src/prompts/processors/models.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-export type ModelFormat = 'codellama' | 'deepseek' | 'stable-code';
-
-export function adaptPrompt(args: { format: ModelFormat, prefix: string, suffix: string }): { prompt: string, stop: string[] } {
-
-    // Common non FIM mode
-    // if (!args.suffix) {
-    //     return {
-    //         prompt: args.prefix,
-    //         stop: [`<END>`]
-    //     };
-    // }
-
-    // Starcoder FIM
-    if (args.format === 'deepseek') {
-        return {
-            prompt: `<｜fim▁begin｜>${args.prefix}<｜fim▁hole｜>${args.suffix}<｜fim▁end｜>`,
-            stop: [`<｜fim▁begin｜>`, `<｜fim▁hole｜>`, `<｜fim▁end｜>`, `<END>`]
-        };
-    }
-
-    // Stable code FIM
-    if (args.format === 'stable-code') {
-        return {
-            prompt: `<fim_prefix>${args.prefix}<fim_suffix>${args.suffix}<fim_middle>`,
-            stop: [`<|endoftext|>`]
-        };
-    }
-
-    // Codellama FIM
-    return {
-        prompt: `<PRE> ${args.prefix} <SUF> ${args.suffix} <MID>`,
-        stop: [`<END>`, `<EOD>`, `<EOT>`]
-    };
-}
\ No newline at end of file
diff --git a/src/prompts/promptCache.ts b/src/prompts/promptCache.ts
deleted file mode 100644
index d4b4d73..0000000
--- a/src/prompts/promptCache.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-
-// Remove all newlines, double spaces, etc
-function normalizeText(src: string) {
-    src = src.split('\n').join(' ');
-    src = src.replace(/\s+/gm, ' ');
-    return src;
-}
-
-function extractPromptCacheKey(args: { prefix: string, suffix: string | null }) {
-    if (args.suffix) {
-        return normalizeText(args.prefix + ' ##CURSOR## ' + args.suffix);
-    } else {
-        return normalizeText(args.prefix);
-    }
-}
-
-// TODO: make it LRU
-let cache: { [key: string]: string | null } = {};
-
-export function getFromPromptCache(args: { prefix: string, suffix: string | null }): string | undefined | null {
-    const key = extractPromptCacheKey(args);
-    return cache[key];
-}
-
-export function setPromptToCache(args: { prefix: string, suffix: string | null, value: string | null }) {
-    const key = extractPromptCacheKey(args);
-    cache[key] = args.value;
-}
\ No newline at end of file
diff --git a/src/prompts/provider.ts b/src/prompts/provider.ts
index ed4be76..4d9c655 100644
--- a/src/prompts/provider.ts
+++ b/src/prompts/provider.ts
@@ -3,10 +3,7 @@ import { info, warn } from '../modules/log';
 import { autocomplete } from './autocomplete';
 import { preparePrompt } from './preparePrompt';
 import { AsyncLock } from '../modules/lock';
-import { getFromPromptCache, setPromptToCache } from './promptCache';
 import { isNotNeeded, isSupported } from './filter';
-import { ollamaCheckModel } from '../modules/ollamaCheckModel';
-import { ollamaDownloadModel } from '../modules/ollamaDownloadModel';
 import { config } from '../config';
 
 type Status = {
@@ -105,85 +102,33 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider {
                 // Result
                 let res: string | null = null;
 
-                // Check if in cache
-                let cached = getFromPromptCache({
-                    prefix: prepared.prefix,
-                    suffix: prepared.suffix
-                });
-
-                // If not cached
-                if (cached === undefined) {
-
-                    // Config
-                    let inferenceConfig = config.inference;
-
-                    // Update status
-                    this.update('sync~spin', 'Llama Coder');
-                    try {
-
-                        // Check model exists
-                        let modelExists = await ollamaCheckModel(inferenceConfig.endpoint, inferenceConfig.modelName, inferenceConfig.bearerToken);
-                        if (token.isCancellationRequested) {
-                            info(`Canceled after AI completion.`);
-                            return;
-                        }
-
-                        // Download model if not exists
-                        if (!modelExists) {
-
-                            // Check if user asked to ignore download
-                            if (this.context.globalState.get('llama-coder-download-ignored') === inferenceConfig.modelName) {
-                                info(`Ingoring since user asked to ignore download.`);
-                                return;
-                            }
-
-                            // Ask for download
-                            let download = await vscode.window.showInformationMessage(`Model ${inferenceConfig.modelName} is not downloaded. Do you want to download it? Answering "No" would require you to manually download model.`, 'Yes', 'No');
-                            if (download === 'No') {
-                                info(`Ingoring since user asked to ignore download.`);
-                                this.context.globalState.update('llama-coder-download-ignored', inferenceConfig.modelName);
-                                return;
-                            }
-
-                            // Perform download
-                            this.update('sync~spin', 'Downloading');
-                            await ollamaDownloadModel(inferenceConfig.endpoint, inferenceConfig.modelName, inferenceConfig.bearerToken);
-                            this.update('sync~spin', 'Llama Coder')
-                        }
-                        if (token.isCancellationRequested) {
-                            info(`Canceled after AI completion.`);
-                            return;
-                        }
-
-                        // Run AI completion
-                        info(`Running AI completion...`);
-                        res = await autocomplete({
-                            prefix: prepared.prefix,
-                            suffix: prepared.suffix,
-                            endpoint: inferenceConfig.endpoint,
-                            bearerToken: inferenceConfig.bearerToken,
-                            model: inferenceConfig.modelName,
-                            format: inferenceConfig.modelFormat,
-                            maxLines: inferenceConfig.maxLines,
-                            maxTokens: inferenceConfig.maxTokens,
-                            temperature: inferenceConfig.temperature,
-                            canceled: () => token.isCancellationRequested,
-                        });
-                        info(`AI completion completed: ${res}`);
-
-                        // Put to cache
-                        setPromptToCache({
-                            prefix: prepared.prefix,
-                            suffix: prepared.suffix,
-                            value: res
-                        });
-                    } finally {
-                        this.update('chip', 'Llama Coder');
-                    }
-                } else {
-                    if (cached !== null) {
-                        res = cached;
+                // Config
+                let inferenceConfig = config.inference;
+
+                // Update status
+                this.update('sync~spin', 'Llama Coder');
+                try {
+                    if (token.isCancellationRequested) {
+                        info(`Canceled after AI completion.`);
+                        return;
                     }
+
+                    // Run AI completion
+                    info(`Running AI completion...`);
+                    res = await autocomplete({
+                        prefix: prepared.prefix,
+                        suffix: prepared.suffix,
+                        endpoint: inferenceConfig.endpoint,
+                        bearerToken: inferenceConfig.bearerToken,
+                        model: inferenceConfig.modelName,
+                        maxLines: inferenceConfig.maxLines,
+                        maxTokens: inferenceConfig.maxTokens,
+                        temperature: inferenceConfig.temperature,
+                        canceled: () => token.isCancellationRequested,
+                    });
+                    info(`AI completion completed: ${res}`);
+                } finally {
+                    this.update('chip', 'Llama Coder');
                 }
                 if (token.isCancellationRequested) {
                     info(`Canceled after AI completion.`);