Skip to content
This repository was archived by the owner on Mar 6, 2024. It is now read-only.

Commit b6b5107

Browse files
committed
get tokens properly
1 parent 575bed8 commit b6b5107

File tree

4 files changed

+37
-25
lines changed

4 files changed

+37
-25
lines changed

dist/index.js

Lines changed: 18 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/bot.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
SendMessageBrowserOptions,
99
SendMessageOptions
1010
} from 'chatgpt'
11+
import * as tokenizer from './tokenizer'
1112

1213
// define type to save parentMessageId and conversationId
1314
export type Ids = {
@@ -49,15 +50,16 @@ export class Bot {
4950
}
5051

5152
chat = async (message: string, ids: Ids): Promise<[string, Ids]> => {
52-
console.time(`chatgpt ${message.length} tokens cost`)
53+
const tokens = tokenizer.get_token_count(message)
54+
console.time(`chatgpt ${tokens} tokens cost`)
5355
let new_ids: Ids = {}
5456
let response = ''
5557
try {
5658
;[response, new_ids] = await this.chat_(message, ids)
5759
} catch (e: any) {
5860
core.warning(`Failed to chat: ${e}, backtrace: ${e.stack}`)
5961
} finally {
60-
console.timeEnd(`chatgpt ${message.length} tokens cost`)
62+
console.timeEnd(`chatgpt ${tokens} tokens cost`)
6163
return [response, new_ids]
6264
}
6365
}

src/review.ts

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
import * as core from '@actions/core'
22
import * as github from '@actions/github'
3-
import {get_encoding} from '@dqbd/tiktoken'
43
import {Octokit} from '@octokit/action'
54
import {Bot} from './bot.js'
65
import {Commenter} from './commenter.js'
76
import {Inputs, Options, Prompts} from './options.js'
8-
9-
// TODO: make this configurable
10-
const tokenizer = get_encoding('cl100k_base')
7+
import * as tokenizer from './tokenizer'
118

129
const token = core.getInput('token')
1310
? core.getInput('token')
@@ -134,7 +131,7 @@ export const codeReview = async (
134131
next_review_ids = review_begin_ids
135132

136133
if (file_content.length > 0) {
137-
const file_content_tokens = await get_token_count(file_content)
134+
const file_content_tokens = tokenizer.get_token_count(file_content)
138135
if (file_content_tokens < MAX_TOKENS_FOR_EXTRA_CONTENT) {
139136
// review file
140137
const [resp, review_file_ids] = await bot.chat(
@@ -154,7 +151,7 @@ export const codeReview = async (
154151
}
155152

156153
if (file_diff.length > 0) {
157-
const file_diff_tokens = await get_token_count(file_diff)
154+
const file_diff_tokens = tokenizer.get_token_count(file_diff)
158155
if (file_diff_tokens < MAX_TOKENS_FOR_EXTRA_CONTENT) {
159156
// review diff
160157
const [resp, review_diff_ids] = await bot.chat(
@@ -328,8 +325,3 @@ const patch_comment_line = (patch: string): number => {
328325
return -1
329326
}
330327
}
331-
332-
const get_token_count = async (text: string): Promise<number> => {
333-
text = text.replace(/<\|endoftext\|>/g, '')
334-
return tokenizer.encode(text).length
335-
}

src/tokenizer.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import {get_encoding} from '@dqbd/tiktoken'
2+
3+
const tokenizer = get_encoding('cl100k_base')
4+
5+
export function encode(input: string): Uint32Array {
6+
return tokenizer.encode(input)
7+
}
8+
9+
export function get_token_count(input: string): number {
10+
input = input.replace(/<\|endoftext\|>/g, '')
11+
return encode(input).length
12+
}

0 commit comments

Comments
 (0)