Skip to content

Commit f7b3daa

Browse files
committed
transalte in splits for token larger than 4096
1 parent 30b34a2 commit f7b3daa

File tree

2 files changed

+95
-43
lines changed

2 files changed

+95
-43
lines changed
Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import time
22

33
import openai
4+
from ..utils import num_tokens_from_messages
45

56
from .base_translator import Base
67

@@ -18,45 +19,91 @@ def rotate_key(self):
1819
def translate(self, text):
1920
print(text)
2021
self.rotate_key()
21-
try:
22-
completion = openai.ChatCompletion.create(
23-
model="gpt-3.5-turbo",
24-
messages=[
25-
{
26-
"role": "user",
27-
# english prompt here to save tokens
28-
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
29-
}
30-
],
31-
)
32-
t_text = (
33-
completion["choices"][0]
34-
.get("message")
35-
.get("content")
36-
.encode("utf8")
37-
.decode()
38-
)
39-
except Exception as e:
40-
# TIME LIMIT for open api please pay
41-
sleep_time = int(60 / self.key_len)
42-
time.sleep(sleep_time)
43-
print(e, f"will sleep {sleep_time} seconds")
44-
self.rotate_key()
45-
completion = openai.ChatCompletion.create(
46-
model="gpt-3.5-turbo",
47-
messages=[
48-
{
49-
"role": "user",
50-
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
51-
}
52-
],
53-
)
54-
t_text = (
55-
completion["choices"][0]
56-
.get("message")
57-
.get("content")
58-
.encode("utf8")
59-
.decode()
60-
)
22+
23+
message_log = [
24+
{
25+
"role": "user",
26+
# english prompt here to save tokens
27+
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
28+
}
29+
]
30+
count_tokens = num_tokens_from_messages(message_log)
31+
t_text = ""
32+
if count_tokens > 4000:
33+
print("too long!")
34+
35+
splits = count_tokens // 4000 + 1
36+
37+
text_list = text.split(".")
38+
sub_text = ""
39+
t_sub_text = ""
40+
for n in range(splits):
41+
text_segment = text_list[n * splits : (n + 1) * splits]
42+
sub_text = ".".join(text_segment)
43+
print(sub_text)
44+
45+
completion = openai.ChatCompletion.create(
46+
model="gpt-3.5-turbo",
47+
messages=[
48+
{
49+
"role": "user",
50+
# english prompt here to save tokens
51+
"content": f"Please help me to translate,`{sub_text}` to {self.language}, please return only translated content not include the origin text",
52+
}
53+
],
54+
)
55+
t_sub_text = (
56+
completion["choices"][0]
57+
.get("message")
58+
.get("content")
59+
.encode("utf8")
60+
.decode()
61+
)
62+
print(t_sub_text)
63+
64+
t_text = t_text + t_sub_text
65+
66+
else:
67+
try:
68+
completion = openai.ChatCompletion.create(
69+
model="gpt-3.5-turbo",
70+
messages=[
71+
{
72+
"role": "user",
73+
# english prompt here to save tokens
74+
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
75+
}
76+
],
77+
)
78+
t_text = (
79+
completion["choices"][0]
80+
.get("message")
81+
.get("content")
82+
.encode("utf8")
83+
.decode()
84+
)
85+
except Exception as e:
86+
# TIME LIMIT for open api please pay
87+
key_len = self.key.count(",") + 1
88+
sleep_time = int(60 / key_len)
89+
time.sleep(sleep_time)
90+
print(e, f"will sleep {sleep_time} seconds")
91+
self.rotate_key()
92+
completion = openai.ChatCompletion.create(
93+
model="gpt-3.5-turbo",
94+
messages=[
95+
{
96+
"role": "user",
97+
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
98+
}
99+
],
100+
)
101+
t_text = (
102+
completion["choices"][0]
103+
.get("message")
104+
.get("content")
105+
.encode("utf8")
106+
.decode()
107+
)
61108
print(t_text)
62109
return t_text

book_maker/utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120

121121
import tiktoken
122122

123+
123124
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
124125
"""Returns the number of tokens used by a list of messages."""
125126
try:
@@ -129,13 +130,17 @@ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
129130
if model == "gpt-3.5-turbo": # note: future models may deviate from this
130131
num_tokens = 0
131132
for message in messages:
132-
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
133+
num_tokens += (
134+
4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
135+
)
133136
for key, value in message.items():
134137
num_tokens += len(encoding.encode(value))
135138
if key == "name": # if there's a name, the role is omitted
136139
num_tokens += -1 # role is always required and always 1 token
137140
num_tokens += 2 # every reply is primed with <im_start>assistant
138141
return num_tokens
139142
else:
140-
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
141-
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
143+
raise NotImplementedError(
144+
f"""num_tokens_from_messages() is not presently implemented for model {model}.
145+
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
146+
)

0 commit comments

Comments
 (0)