Skip to content

Commit 3793194

Browse files
committed
wip+feat: working on language detection, detect language for filename marking
1 parent 2cebf58 commit 3793194

18 files changed

+2104
-591
lines changed

CHANGELOG.md

Lines changed: 0 additions & 13 deletions
This file was deleted.

README.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,20 @@ Currently Llama Coder supports only Codellama. Model is quantized in different w
4242
* m - slow on MacOS
4343
* g - slow on older NVidia cards (pre 30xx)
4444

45-
## License
45+
## Changelog
4646

47-
MIT
47+
## [0.0.8]
48+
- Improved DeepSeek support and language detection
49+
50+
## [0.0.7]
51+
- Added DeepSeek support
52+
- Ability to change temperature and top p
53+
- Fixed some bugs
54+
55+
## [0.0.6]
56+
- Fix ollama links
57+
- Added more models
58+
59+
## [0.0.4]
60+
61+
- Initial release of Llama Coder

jest.config.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
2+
module.exports = {
3+
preset: 'ts-jest',
4+
testEnvironment: 'node',
5+
testPathIgnorePatterns: ["/node_modules/","/out/"],
6+
setupFiles: ['./jest.setup.js']
7+
};

jest.setup.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
require('dotenv').config();

package.json

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "llama-coder",
33
"displayName": "Llama Coder",
44
"description": "Better and self-hosted Github Copilot replacement",
5-
"version": "0.0.7",
5+
"version": "0.0.8",
66
"icon": "icon.png",
77
"publisher": "ex3ndr",
88
"repository": {
@@ -93,19 +93,19 @@
9393
"watch": "tsc -watch -p ./",
9494
"pretest": "yarn run compile && yarn run lint",
9595
"lint": "eslint src --ext ts",
96-
"test": "node ./out/test/runTest.js",
96+
"test": "jest",
9797
"package": "vsce package"
9898
},
9999
"devDependencies": {
100-
"@types/vscode": "^1.84.0",
101-
"@types/mocha": "^10.0.3",
100+
"@types/jest": "^29.5.10",
102101
"@types/node": "18.x",
102+
"@types/vscode": "^1.84.0",
103103
"@typescript-eslint/eslint-plugin": "^6.9.0",
104104
"@typescript-eslint/parser": "^6.9.0",
105+
"dotenv": "^16.3.1",
105106
"eslint": "^8.52.0",
106-
"glob": "^10.3.10",
107-
"mocha": "^10.2.0",
108-
"typescript": "^5.2.2",
109-
"@vscode/test-electron": "^2.3.6"
107+
"jest": "^29.7.0",
108+
"ts-jest": "^29.1.1",
109+
"typescript": "^5.2.2"
110110
}
111-
}
111+
}

src/prompts/adaptors/adaptPrompt.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
export function adaptPrompt(args: { model: string, prefix: string, suffix: string }): { prompt: string, stop: string[] } {
1+
export function adaptPrompt(args: { model: string, prefix: string, suffix: string | null }): { prompt: string, stop: string[] } {
22

3-
// Starcoder format
4-
if (args.model.startsWith('deepseek-coder')) {
5-
6-
if (args.suffix.length < 1000) {
7-
return {
8-
prompt: args.prefix,
9-
stop: [`<END>`]
10-
};
11-
}
3+
// Common non FIM mode
4+
if (!args.suffix) {
5+
return {
6+
prompt: args.prefix,
7+
stop: [`<END>`]
8+
};
9+
}
1210

11+
// Starcoder FIM
12+
if (args.model.startsWith('deepseek-coder')) {
1313
return {
1414
prompt: `<|fim▁begin|>${args.prefix}<|fim▁hole|>${args.suffix}<|fim▁end|>`,
1515
stop: [`<|fim▁begin|>`, `<|fim▁hole|>`, `<|fim▁end|>`, `<END>`]
1616
};
1717
}
1818

19-
// Codellama format
19+
// Codellama FIM
2020
return {
2121
prompt: `<PRE> ${args.prefix} <SUF>${args.suffix} <MID>`,
2222
stop: [`<PRE>`, `<SUF>`, `<MID>`, `<END>`]

src/prompts/autocomplete.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export async function autocomplete(args: {
77
endpoint: string,
88
model: string,
99
prefix: string,
10-
suffix: string,
10+
suffix: string | null,
1111
maxLines: number,
1212
maxTokens: number,
1313
temperature: number,

src/prompts/preparePrompt.ts

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
11
import vscode from 'vscode';
2-
import path from 'path';
2+
import { detectLanguage } from './processors/detectLanguage';
3+
import { fileHeaders } from './processors/fileHeaders';
4+
import { languages } from './processors/languages';
35

46
export async function preparePrompt(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext) {
57

68
// Load document text
79
let text = document.getText();
810
let offset = document.offsetAt(position);
911
let prefix = text.slice(0, offset);
10-
let suffix = text.slice(offset);
12+
let suffix: string | null = text.slice(offset);
1113

1214
// Trim suffix
13-
// NOTE: It seems that most neural networks are built have a focus on last characters and we therefore need to trim them to not get weird results.
14-
// TODO: Better solution?
15-
// TODO: Am i right here? What if we would want to generate something that uses something in the end of the file?
16-
if (suffix.length > 256) {
17-
suffix = suffix.slice(0, 256);
15+
// If suffix is too small it is safe to assume that it could be ignored which would allow us to use
16+
// more powerful completition instead of in middle one
17+
if (suffix.length < 256) {
18+
suffix = null;
1819
}
1920

2021
// Add filename and language to prefix
2122
// NOTE: Most networks don't have a concept of filenames and expected language, but we expect that some files in training set has something in title that
2223
// would indicate filename and language
23-
// NOTE: We are building for typescript for now so we can use C-style comments to indicate filename
24-
let filename = path.basename(document.fileName);
25-
let language = document.languageId;
26-
let filenamePrefix = `/* ${language}, filename: ${filename} */`;
27-
prefix = filenamePrefix + '\n' + prefix;
24+
// NOTE: If we can't detect language, we could ignore this since the number of languages that need detection is limited
25+
let language = detectLanguage(document.uri.fsPath, document.languageId);
26+
if (language) {
27+
prefix = fileHeaders(prefix, document.uri.fsPath, languages[language]);
28+
}
2829

2930
return {
3031
prefix,

src/prompts/processors/comment.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { LanguageDescriptor } from "./languages";
2+
3+
export function comment(text: string, language: LanguageDescriptor): string | null {
4+
if (language.comment) {
5+
if (language.comment.end) {
6+
return `${language.comment.start} ${text} ${language.comment.end}`;
7+
} else {
8+
return `${language.comment.start} ${text}`;
9+
}
10+
}
11+
return null;
12+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { detectLanguage } from './detectLanguage';
2+
3+
describe('detectLanguage', () => {
4+
it('should detect language from happy path', () => {
5+
6+
});
7+
});

0 commit comments

Comments
 (0)