Skip to content

Commit 2ef8359

Browse files
committed
improve translator
1 parent 23feddf commit 2ef8359

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

scripts/translator.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,11 @@ def split_text(text, model):
224224
chunks = []
225225
chunk = ''
226226
in_code_block = False
227+
in_ref = False
227228

228229
for line in lines:
229-
# If we are in a code block, just add the code to the chunk
230+
231+
# Keep code blocks as one chunk
230232
if line.startswith('```'):
231233

232234
# If we are in a code block, finish it with the "```"
@@ -242,7 +244,24 @@ def split_text(text, model):
242244
chunk += line + '\n'
243245

244246
continue
247+
248+
"""
249+
Prevent refs using `` like:
250+
{{#ref}}
251+
../../generic-methodologies-and-resources/pentesting-network/`spoofing-llmnr-nbt-ns-mdns-dns-and-wpad-and-relay-attacks.md`
252+
{{#endref}}
253+
"""
254+
if line.startswith('{{#ref}}'):
255+
in_ref = True
256+
257+
if in_ref:
258+
line = line.replace("`", "")
259+
260+
if line.startswith('{{#endref}}'):
261+
in_ref = False
262+
245263

264+
# If new section, see if we should be splitting the text
246265
if (line.startswith('#') and reportTokens(chunk + "\n" + line.strip(), model) > MAX_TOKENS*0.8) or \
247266
reportTokens(chunk + "\n" + line.strip(), model) > MAX_TOKENS:
248267

0 commit comments

Comments
 (0)