Skip to content

Commit 664b0e7

Browse files
committed
feat: Implement Telegram HTML support
1 parent 8eb1c66 commit 664b0e7

File tree

4 files changed

+154
-9
lines changed

4 files changed

+154
-9
lines changed

app/ui/src/components/Bot/Playground/Message.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ export const PlaygroundMessage = (props: Props) => {
5656
<Markdown message={props.message} />
5757
</div>
5858

59-
{props.isBot && (
59+
{props.isBot && props?.sources && props?.sources?.length > 0 && (
6060
<Collapse
6161
className="mt-6"
6262
ghost

server/src/integration/telegram.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { convertTextToAudio } from "./handlers/utils/audio-to-text";
99
import { FileFlavor, hydrateFiles } from "@grammyjs/files";
1010
import * as fs from "fs/promises";
1111
import { convertOggToWave } from "../utils/ffmpeg";
12+
import { telegramFormat } from "../utils/telegram-format";
1213
type DialoqBaseContext = FileFlavor<Context>;
1314
export default class TelegramBot {
1415
static get clients() {
@@ -73,9 +74,14 @@ export default class TelegramBot {
7374
user_id
7475
);
7576

76-
return await ctx.reply(message, {
77-
parse_mode: "MarkdownV2",
78-
});
77+
if (process.env.DB_TELEGEAM_PARSE_MODE === "normal") {
78+
return await ctx.reply(message);
79+
}
80+
81+
return await ctx.reply(telegramFormat(message),
82+
{
83+
parse_mode: "HTML",
84+
});
7985
});
8086

8187
bot.on("message:voice", async (ctx) => {
@@ -102,9 +108,15 @@ export default class TelegramBot {
102108
user_id
103109
);
104110

105-
return await ctx.reply(message, {
106-
parse_mode: "MarkdownV2",
107-
});
111+
112+
if (process.env.DB_TELEGEAM_PARSE_MODE === "normal") {
113+
return await ctx.reply(message);
114+
}
115+
116+
return await ctx.reply(telegramFormat(message),
117+
{
118+
parse_mode: "HTML",
119+
});
108120
} catch (error) {
109121
console.log(error);
110122
return await ctx.reply("Opps! Something went wrong");

server/src/internet/index.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ const searchProviders = {
117117

118118
export const searchInternet = async (embedding: Embeddings, { query }: { query: string }) => {
119119

120-
if(process.env.DISABLE_INTERNET_SEARCH == "true") {
120+
if (process.env.DISABLE_INTERNET_SEARCH == "true") {
121121
return [];
122122
}
123123

@@ -127,7 +127,9 @@ export const searchInternet = async (embedding: Embeddings, { query }: { query:
127127
}
128128
const datat = await searchProvider(query);
129129

130-
const results = datat.slice(0, TOTAL_RESULTS_LIMIT);
130+
const data = datat.filter((doc) => doc?.content.length > 0);
131+
132+
const results = data.slice(0, TOTAL_RESULTS_LIMIT)
131133

132134
const [docEmbeddings, queryEmbedding] = await Promise.all([
133135
embedding.embedDocuments(results.map((doc) => doc.content)),
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// this code is a typescript conversion of the original python code from the repo: https://github.com/Latand/formatter-chatgpt-telegram
2+
3+
function convertHtmlChars(text: string): string {
4+
text = text.replace(/&/g, "&amp;");
5+
text = text.replace(/</g, "&lt;");
6+
text = text.replace(/>/g, "&gt;");
7+
return text;
8+
}
9+
10+
function splitByTag(outText: string, mdTag: string, htmlTag: string): string {
11+
const tagPattern = new RegExp(
12+
`(?<!\\w)${escapeRegExp(mdTag)}(.*?)${escapeRegExp(mdTag)}(?!\\w)`,
13+
"gs"
14+
);
15+
return outText.replace(tagPattern, `<${htmlTag}>$1</${htmlTag}>`);
16+
}
17+
18+
function escapeRegExp(string: string): string {
19+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
20+
}
21+
22+
function ensureClosingDelimiters(text: string): string {
23+
if ((text.match(/```/g) || []).length % 2 !== 0) {
24+
text += "```";
25+
}
26+
if ((text.match(/`/g) || []).length % 2 !== 0) {
27+
text += "`";
28+
}
29+
return text;
30+
}
31+
32+
function extractAndConvertCodeBlocks(text: string): [string, Record<string, string>] {
33+
text = ensureClosingDelimiters(text);
34+
const placeholders: string[] = [];
35+
const codeBlocks: Record<string, string> = {};
36+
37+
const replacer = (match: RegExpMatchArray): [string, string] => {
38+
const language = match[1] || "";
39+
const codeContent = match[3];
40+
const placeholder = `CODEBLOCKPLACEHOLDER${placeholders.length}`;
41+
placeholders.push(placeholder);
42+
const htmlCodeBlock = language
43+
? `<pre><code class="language-${language}">${codeContent}</code></pre>`
44+
: `<pre><code>${codeContent}</code></pre>`;
45+
return [placeholder, htmlCodeBlock];
46+
};
47+
48+
let modifiedText = text;
49+
const regex = /```(\w*)?(\n)?(.*?)```/gs;
50+
let match: RegExpExecArray | null;
51+
52+
while ((match = regex.exec(text)) !== null) {
53+
const [placeholder, htmlCodeBlock] = replacer(match);
54+
codeBlocks[placeholder] = htmlCodeBlock;
55+
modifiedText = modifiedText.replace(match[0], placeholder);
56+
}
57+
58+
return [modifiedText, codeBlocks];
59+
}
60+
61+
function reinsertCodeBlocks(text: string, codeBlocks: Record<string, string>): string {
62+
for (const [placeholder, htmlCodeBlock] of Object.entries(codeBlocks)) {
63+
text = text.replace(placeholder, htmlCodeBlock);
64+
}
65+
return text;
66+
}
67+
68+
function combineBlockquotes(text: string): string {
69+
const lines = text.split("\n");
70+
const combinedLines: string[] = [];
71+
let blockquoteLines: string[] = [];
72+
let inBlockquote = false;
73+
74+
for (const line of lines) {
75+
if (line.startsWith(">")) {
76+
inBlockquote = true;
77+
blockquoteLines.push(line.slice(1).trim());
78+
} else {
79+
if (inBlockquote) {
80+
combinedLines.push(
81+
`<blockquote>${blockquoteLines.join("\n")}</blockquote>`
82+
);
83+
blockquoteLines = [];
84+
inBlockquote = false;
85+
}
86+
combinedLines.push(line);
87+
}
88+
}
89+
90+
if (inBlockquote) {
91+
combinedLines.push(
92+
`<blockquote>${blockquoteLines.join("\n")}</blockquote>`
93+
);
94+
}
95+
96+
return combinedLines.join("\n");
97+
}
98+
99+
function removeBlockquoteEscaping(output: string): string {
100+
return output
101+
.replace(/&lt;blockquote&gt;/g, "<blockquote>")
102+
.replace(/&lt;\/blockquote&gt;/g, "</blockquote>");
103+
}
104+
105+
export function telegramFormat(text: string): string {
106+
text = combineBlockquotes(text);
107+
text = convertHtmlChars(text);
108+
109+
let [output, codeBlocks] = extractAndConvertCodeBlocks(text);
110+
111+
output = output.replace(/</g, "&lt;").replace(/>/g, "&gt;");
112+
output = output.replace(/`(.*?)`/g, "<code>$1</code>");
113+
output = output.replace(/\*\*\*(.*?)\*\*\*/g, "<b><i>$1</i></b>");
114+
output = output.replace(/\_\_\_(.*?)\_\_\_/g, "<u><i>$1</i></u>");
115+
116+
output = splitByTag(output, "**", "b");
117+
output = splitByTag(output, "__", "u");
118+
output = splitByTag(output, "_", "i");
119+
output = splitByTag(output, "*", "i");
120+
output = splitByTag(output, "~~", "s");
121+
122+
output = output.replace(/[^]+/g, "");
123+
output = output.replace(/!?\\[(.*?)\\]\\((.*?)\\)/g, '<a href="$2">$1</a>');
124+
output = output.replace(/^\s*#+ (.+)/gm, "<b>$1</b>");
125+
output = output.replace(/^(\s*)[\-\*] (.+)/gm, "$1• $2");
126+
127+
output = reinsertCodeBlocks(output, codeBlocks);
128+
output = removeBlockquoteEscaping(output);
129+
130+
return output;
131+
}

0 commit comments

Comments
 (0)