|
| 1 | +// this code is a typescript conversion of the original python code from the repo: https://github.com/Latand/formatter-chatgpt-telegram |
| 2 | + |
| 3 | +function convertHtmlChars(text: string): string { |
| 4 | + text = text.replace(/&/g, "&"); |
| 5 | + text = text.replace(/</g, "<"); |
| 6 | + text = text.replace(/>/g, ">"); |
| 7 | + return text; |
| 8 | +} |
| 9 | + |
| 10 | +function splitByTag(outText: string, mdTag: string, htmlTag: string): string { |
| 11 | + const tagPattern = new RegExp( |
| 12 | + `(?<!\\w)${escapeRegExp(mdTag)}(.*?)${escapeRegExp(mdTag)}(?!\\w)`, |
| 13 | + "gs" |
| 14 | + ); |
| 15 | + return outText.replace(tagPattern, `<${htmlTag}>$1</${htmlTag}>`); |
| 16 | +} |
| 17 | + |
| 18 | +function escapeRegExp(string: string): string { |
| 19 | + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); |
| 20 | +} |
| 21 | + |
| 22 | +function ensureClosingDelimiters(text: string): string { |
| 23 | + if ((text.match(/```/g) || []).length % 2 !== 0) { |
| 24 | + text += "```"; |
| 25 | + } |
| 26 | + if ((text.match(/`/g) || []).length % 2 !== 0) { |
| 27 | + text += "`"; |
| 28 | + } |
| 29 | + return text; |
| 30 | +} |
| 31 | + |
| 32 | +function extractAndConvertCodeBlocks(text: string): [string, Record<string, string>] { |
| 33 | + text = ensureClosingDelimiters(text); |
| 34 | + const placeholders: string[] = []; |
| 35 | + const codeBlocks: Record<string, string> = {}; |
| 36 | + |
| 37 | + const replacer = (match: RegExpMatchArray): [string, string] => { |
| 38 | + const language = match[1] || ""; |
| 39 | + const codeContent = match[3]; |
| 40 | + const placeholder = `CODEBLOCKPLACEHOLDER${placeholders.length}`; |
| 41 | + placeholders.push(placeholder); |
| 42 | + const htmlCodeBlock = language |
| 43 | + ? `<pre><code class="language-${language}">${codeContent}</code></pre>` |
| 44 | + : `<pre><code>${codeContent}</code></pre>`; |
| 45 | + return [placeholder, htmlCodeBlock]; |
| 46 | + }; |
| 47 | + |
| 48 | + let modifiedText = text; |
| 49 | + const regex = /```(\w*)?(\n)?(.*?)```/gs; |
| 50 | + let match: RegExpExecArray | null; |
| 51 | + |
| 52 | + while ((match = regex.exec(text)) !== null) { |
| 53 | + const [placeholder, htmlCodeBlock] = replacer(match); |
| 54 | + codeBlocks[placeholder] = htmlCodeBlock; |
| 55 | + modifiedText = modifiedText.replace(match[0], placeholder); |
| 56 | + } |
| 57 | + |
| 58 | + return [modifiedText, codeBlocks]; |
| 59 | +} |
| 60 | + |
| 61 | +function reinsertCodeBlocks(text: string, codeBlocks: Record<string, string>): string { |
| 62 | + for (const [placeholder, htmlCodeBlock] of Object.entries(codeBlocks)) { |
| 63 | + text = text.replace(placeholder, htmlCodeBlock); |
| 64 | + } |
| 65 | + return text; |
| 66 | +} |
| 67 | + |
| 68 | +function combineBlockquotes(text: string): string { |
| 69 | + const lines = text.split("\n"); |
| 70 | + const combinedLines: string[] = []; |
| 71 | + let blockquoteLines: string[] = []; |
| 72 | + let inBlockquote = false; |
| 73 | + |
| 74 | + for (const line of lines) { |
| 75 | + if (line.startsWith(">")) { |
| 76 | + inBlockquote = true; |
| 77 | + blockquoteLines.push(line.slice(1).trim()); |
| 78 | + } else { |
| 79 | + if (inBlockquote) { |
| 80 | + combinedLines.push( |
| 81 | + `<blockquote>${blockquoteLines.join("\n")}</blockquote>` |
| 82 | + ); |
| 83 | + blockquoteLines = []; |
| 84 | + inBlockquote = false; |
| 85 | + } |
| 86 | + combinedLines.push(line); |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + if (inBlockquote) { |
| 91 | + combinedLines.push( |
| 92 | + `<blockquote>${blockquoteLines.join("\n")}</blockquote>` |
| 93 | + ); |
| 94 | + } |
| 95 | + |
| 96 | + return combinedLines.join("\n"); |
| 97 | +} |
| 98 | + |
| 99 | +function removeBlockquoteEscaping(output: string): string { |
| 100 | + return output |
| 101 | + .replace(/<blockquote>/g, "<blockquote>") |
| 102 | + .replace(/<\/blockquote>/g, "</blockquote>"); |
| 103 | +} |
| 104 | + |
| 105 | +export function telegramFormat(text: string): string { |
| 106 | + text = combineBlockquotes(text); |
| 107 | + text = convertHtmlChars(text); |
| 108 | + |
| 109 | + let [output, codeBlocks] = extractAndConvertCodeBlocks(text); |
| 110 | + |
| 111 | + output = output.replace(/</g, "<").replace(/>/g, ">"); |
| 112 | + output = output.replace(/`(.*?)`/g, "<code>$1</code>"); |
| 113 | + output = output.replace(/\*\*\*(.*?)\*\*\*/g, "<b><i>$1</i></b>"); |
| 114 | + output = output.replace(/\_\_\_(.*?)\_\_\_/g, "<u><i>$1</i></u>"); |
| 115 | + |
| 116 | + output = splitByTag(output, "**", "b"); |
| 117 | + output = splitByTag(output, "__", "u"); |
| 118 | + output = splitByTag(output, "_", "i"); |
| 119 | + output = splitByTag(output, "*", "i"); |
| 120 | + output = splitByTag(output, "~~", "s"); |
| 121 | + |
| 122 | + output = output.replace(/【[^】]+】/g, ""); |
| 123 | + output = output.replace(/!?\\[(.*?)\\]\\((.*?)\\)/g, '<a href="$2">$1</a>'); |
| 124 | + output = output.replace(/^\s*#+ (.+)/gm, "<b>$1</b>"); |
| 125 | + output = output.replace(/^(\s*)[\-\*] (.+)/gm, "$1• $2"); |
| 126 | + |
| 127 | + output = reinsertCodeBlocks(output, codeBlocks); |
| 128 | + output = removeBlockquoteEscaping(output); |
| 129 | + |
| 130 | + return output; |
| 131 | +} |
0 commit comments